aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndroid Build Coastguard Worker <android-build-coastguard-worker@google.com>2024-03-21 23:41:35 +0000
committerGerrit Code Review <noreply-gerritcodereview@google.com>2024-03-21 23:41:35 +0000
commit4d6f34f32c7828ab935beb54cdca08bd7bbd39c3 (patch)
tree7aa355fd0b89ec0b2611e17ee84a14c6fa449e22
parent7447ad8db319c34111a3d43b64b9485ac1a8115d (diff)
parent50ea3b267729413aa561eaea5aac7e6fcd565101 (diff)
downloadlibyuv-androidx-fragment-release.tar.gz
Merge "Snap for 11610999 from 488a2af021e3e7473f083a9435b1472c0d411f3d to androidx-fragment-release" into androidx-fragment-releaseandroidx-fragment-release
-rw-r--r--.clang-format (renamed from files/.clang-format)0
-rw-r--r--.gitignore (renamed from files/.gitignore)0
-rw-r--r--.gn (renamed from files/.gn)4
-rw-r--r--.vpython (renamed from files/.vpython)0
-rw-r--r--.vpython3 (renamed from files/.vpython3)4
-rw-r--r--AUTHORS (renamed from files/AUTHORS)0
-rw-r--r--Android.bp179
-rw-r--r--BUILD14
-rw-r--r--BUILD.gn (renamed from files/BUILD.gn)77
-rw-r--r--CM_linux_packages.cmake (renamed from files/CM_linux_packages.cmake)4
-rw-r--r--CMakeLists.txt (renamed from files/CMakeLists.txt)26
-rw-r--r--DEPS (renamed from files/DEPS)1114
-rw-r--r--DIR_METADATA (renamed from files/DIR_METADATA)0
-rw-r--r--LICENSE2
-rw-r--r--METADATA23
-rw-r--r--OWNERS15
-rw-r--r--OWNERS.android1
-rw-r--r--PATENTS (renamed from files/PATENTS)0
-rw-r--r--PRESUBMIT.py (renamed from files/PRESUBMIT.py)0
-rw-r--r--README.chromium (renamed from files/README.chromium)5
-rw-r--r--README.md (renamed from files/README.md)1
-rw-r--r--README.version8
-rw-r--r--UPDATING36
-rw-r--r--build_overrides/build.gni (renamed from files/build_overrides/build.gni)3
-rw-r--r--build_overrides/gtest.gni (renamed from files/build_overrides/gtest.gni)0
-rw-r--r--build_overrides/partition_alloc.gni17
-rwxr-xr-xcleanup_links.py (renamed from files/cleanup_links.py)0
-rw-r--r--codereview.settings4
-rw-r--r--docs/deprecated_builds.md (renamed from files/docs/deprecated_builds.md)4
-rw-r--r--docs/environment_variables.md (renamed from files/docs/environment_variables.md)3
-rw-r--r--docs/filtering.md (renamed from files/docs/filtering.md)0
-rw-r--r--docs/formats.md (renamed from files/docs/formats.md)0
-rw-r--r--docs/getting_started.md (renamed from files/docs/getting_started.md)47
-rw-r--r--docs/rotation.md (renamed from files/docs/rotation.md)0
-rw-r--r--download_vs_toolchain.py (renamed from files/download_vs_toolchain.py)0
-rw-r--r--files/Android.bp196
-rw-r--r--files/LICENSE29
-rw-r--r--files/codereview.settings5
-rw-r--r--files/public.mk13
-rw-r--r--files/source/compare_mmi.cc123
-rw-r--r--files/source/rotate_common.cc106
-rw-r--r--files/source/rotate_mmi.cc291
-rw-r--r--files/source/row_mmi.cc7842
-rw-r--r--files/source/scale_mmi.cc1168
-rwxr-xr-xfiles/tools_libyuv/autoroller/roll_deps.py509
-rw-r--r--include/libyuv.h (renamed from files/include/libyuv.h)0
-rw-r--r--include/libyuv/basic_types.h (renamed from files/include/libyuv/basic_types.h)0
-rw-r--r--include/libyuv/compare.h (renamed from files/include/libyuv/compare.h)0
-rw-r--r--include/libyuv/compare_row.h (renamed from files/include/libyuv/compare_row.h)5
-rw-r--r--include/libyuv/convert.h (renamed from files/include/libyuv/convert.h)123
-rw-r--r--include/libyuv/convert_argb.h (renamed from files/include/libyuv/convert_argb.h)126
-rw-r--r--include/libyuv/convert_from.h (renamed from files/include/libyuv/convert_from.h)0
-rw-r--r--include/libyuv/convert_from_argb.h (renamed from files/include/libyuv/convert_from_argb.h)51
-rw-r--r--include/libyuv/cpu_id.h (renamed from files/include/libyuv/cpu_id.h)30
-rw-r--r--include/libyuv/loongson_intrinsics.h (renamed from files/include/libyuv/loongson_intrinsics.h)0
-rw-r--r--include/libyuv/macros_msa.h (renamed from files/include/libyuv/macros_msa.h)0
-rw-r--r--include/libyuv/mjpeg_decoder.h (renamed from files/include/libyuv/mjpeg_decoder.h)0
-rw-r--r--include/libyuv/planar_functions.h (renamed from files/include/libyuv/planar_functions.h)70
-rw-r--r--include/libyuv/rotate.h (renamed from files/include/libyuv/rotate.h)64
-rw-r--r--include/libyuv/rotate_argb.h (renamed from files/include/libyuv/rotate_argb.h)0
-rw-r--r--include/libyuv/rotate_row.h (renamed from files/include/libyuv/rotate_row.h)50
-rw-r--r--include/libyuv/row.h (renamed from files/include/libyuv/row.h)1148
-rw-r--r--include/libyuv/scale.h (renamed from files/include/libyuv/scale.h)55
-rw-r--r--include/libyuv/scale_argb.h (renamed from files/include/libyuv/scale_argb.h)0
-rw-r--r--include/libyuv/scale_rgb.h (renamed from files/include/libyuv/scale_rgb.h)0
-rw-r--r--include/libyuv/scale_row.h (renamed from files/include/libyuv/scale_row.h)187
-rw-r--r--include/libyuv/scale_uv.h (renamed from files/include/libyuv/scale_uv.h)0
-rw-r--r--include/libyuv/version.h (renamed from files/include/libyuv/version.h)2
-rw-r--r--include/libyuv/video_common.h (renamed from files/include/libyuv/video_common.h)0
-rw-r--r--infra/config/OWNERS3
-rw-r--r--infra/config/PRESUBMIT.py (renamed from files/infra/config/PRESUBMIT.py)2
-rw-r--r--infra/config/README.md (renamed from files/infra/config/README.md)0
-rw-r--r--infra/config/codereview.settings (renamed from files/infra/config/codereview.settings)0
-rw-r--r--infra/config/commit-queue.cfg (renamed from files/infra/config/commit-queue.cfg)0
-rw-r--r--infra/config/cr-buildbucket.cfg (renamed from files/infra/config/cr-buildbucket.cfg)401
-rw-r--r--infra/config/luci-logdog.cfg (renamed from files/infra/config/luci-logdog.cfg)0
-rw-r--r--infra/config/luci-milo.cfg (renamed from files/infra/config/luci-milo.cfg)0
-rw-r--r--infra/config/luci-scheduler.cfg (renamed from files/infra/config/luci-scheduler.cfg)0
-rwxr-xr-xinfra/config/main.star (renamed from files/infra/config/main.star)45
-rw-r--r--infra/config/project.cfg (renamed from files/infra/config/project.cfg)2
-rw-r--r--infra/config/realms.cfg (renamed from files/infra/config/realms.cfg)4
-rw-r--r--libyuv.gni (renamed from files/libyuv.gni)8
-rw-r--r--libyuv.gyp (renamed from files/libyuv.gyp)0
-rw-r--r--libyuv.gypi (renamed from files/libyuv.gypi)0
-rw-r--r--linux.mk (renamed from files/linux.mk)6
-rw-r--r--public.mk2
-rw-r--r--pylintrc (renamed from files/pylintrc)0
-rwxr-xr-xriscv_script/prepare_toolchain_qemu.sh74
-rw-r--r--riscv_script/riscv-clang.cmake55
-rwxr-xr-xriscv_script/run_qemu.sh15
-rw-r--r--source/compare.cc (renamed from files/source/compare.cc)6
-rw-r--r--source/compare_common.cc (renamed from files/source/compare_common.cc)0
-rw-r--r--source/compare_gcc.cc (renamed from files/source/compare_gcc.cc)2
-rw-r--r--source/compare_msa.cc (renamed from files/source/compare_msa.cc)0
-rw-r--r--source/compare_neon.cc (renamed from files/source/compare_neon.cc)0
-rw-r--r--source/compare_neon64.cc (renamed from files/source/compare_neon64.cc)0
-rw-r--r--source/compare_win.cc (renamed from files/source/compare_win.cc)0
-rw-r--r--source/convert.cc (renamed from files/source/convert.cc)1018
-rw-r--r--source/convert_argb.cc (renamed from files/source/convert_argb.cc)1639
-rw-r--r--source/convert_from.cc (renamed from files/source/convert_from.cc)114
-rw-r--r--source/convert_from_argb.cc (renamed from files/source/convert_from_argb.cc)1129
-rw-r--r--source/convert_jpeg.cc (renamed from files/source/convert_jpeg.cc)0
-rw-r--r--source/convert_to_argb.cc (renamed from files/source/convert_to_argb.cc)0
-rw-r--r--source/convert_to_i420.cc (renamed from files/source/convert_to_i420.cc)0
-rw-r--r--source/cpu_id.cc (renamed from files/source/cpu_id.cc)109
-rw-r--r--source/mjpeg_decoder.cc (renamed from files/source/mjpeg_decoder.cc)4
-rw-r--r--source/mjpeg_validate.cc (renamed from files/source/mjpeg_validate.cc)0
-rw-r--r--source/planar_functions.cc (renamed from files/source/planar_functions.cc)751
-rw-r--r--source/rotate.cc (renamed from files/source/rotate.cc)437
-rw-r--r--source/rotate_any.cc (renamed from files/source/rotate_any.cc)0
-rw-r--r--source/rotate_argb.cc (renamed from files/source/rotate_argb.cc)25
-rw-r--r--source/rotate_common.cc198
-rw-r--r--source/rotate_gcc.cc (renamed from files/source/rotate_gcc.cc)130
-rw-r--r--source/rotate_lsx.cc (renamed from files/source/rotate_lsx.cc)0
-rw-r--r--source/rotate_msa.cc (renamed from files/source/rotate_msa.cc)0
-rw-r--r--source/rotate_neon.cc (renamed from files/source/rotate_neon.cc)40
-rw-r--r--source/rotate_neon64.cc (renamed from files/source/rotate_neon64.cc)71
-rw-r--r--source/rotate_win.cc (renamed from files/source/rotate_win.cc)0
-rw-r--r--source/row_any.cc (renamed from files/source/row_any.cc)852
-rw-r--r--source/row_common.cc (renamed from files/source/row_common.cc)915
-rw-r--r--source/row_gcc.cc (renamed from files/source/row_gcc.cc)679
-rw-r--r--source/row_lasx.cc (renamed from files/source/row_lasx.cc)406
-rw-r--r--source/row_lsx.cc (renamed from files/source/row_lsx.cc)1534
-rw-r--r--source/row_msa.cc (renamed from files/source/row_msa.cc)0
-rw-r--r--source/row_neon.cc (renamed from files/source/row_neon.cc)370
-rw-r--r--source/row_neon64.cc (renamed from files/source/row_neon64.cc)468
-rw-r--r--source/row_rvv.cc1394
-rw-r--r--source/row_win.cc (renamed from files/source/row_win.cc)65
-rw-r--r--source/scale.cc (renamed from files/source/scale.cc)716
-rw-r--r--source/scale_any.cc (renamed from files/source/scale_any.cc)16
-rw-r--r--source/scale_argb.cc (renamed from files/source/scale_argb.cc)330
-rw-r--r--source/scale_common.cc (renamed from files/source/scale_common.cc)220
-rw-r--r--source/scale_gcc.cc (renamed from files/source/scale_gcc.cc)5
-rw-r--r--source/scale_lsx.cc (renamed from files/source/scale_lsx.cc)0
-rw-r--r--source/scale_msa.cc (renamed from files/source/scale_msa.cc)0
-rw-r--r--source/scale_neon.cc (renamed from files/source/scale_neon.cc)39
-rw-r--r--source/scale_neon64.cc (renamed from files/source/scale_neon64.cc)134
-rw-r--r--source/scale_rgb.cc (renamed from files/source/scale_rgb.cc)0
-rw-r--r--source/scale_rvv.cc1040
-rw-r--r--source/scale_uv.cc (renamed from files/source/scale_uv.cc)391
-rw-r--r--source/scale_win.cc (renamed from files/source/scale_win.cc)0
-rwxr-xr-xsource/test.sh (renamed from files/source/test.sh)0
-rw-r--r--source/video_common.cc (renamed from files/source/video_common.cc)0
-rw-r--r--tools_libyuv/OWNERS4
-rwxr-xr-xtools_libyuv/autoroller/roll_deps.py822
-rwxr-xr-xtools_libyuv/autoroller/unittests/roll_deps_test.py (renamed from files/tools_libyuv/autoroller/unittests/roll_deps_test.py)0
-rw-r--r--tools_libyuv/autoroller/unittests/testdata/DEPS (renamed from files/tools_libyuv/autoroller/unittests/testdata/DEPS)0
-rw-r--r--tools_libyuv/autoroller/unittests/testdata/DEPS.chromium.new (renamed from files/tools_libyuv/autoroller/unittests/testdata/DEPS.chromium.new)0
-rw-r--r--tools_libyuv/autoroller/unittests/testdata/DEPS.chromium.old (renamed from files/tools_libyuv/autoroller/unittests/testdata/DEPS.chromium.old)0
-rwxr-xr-xtools_libyuv/get_landmines.py (renamed from files/tools_libyuv/get_landmines.py)0
-rw-r--r--tools_libyuv/msan/OWNERS3
-rw-r--r--tools_libyuv/msan/blacklist.txt (renamed from files/tools_libyuv/msan/blacklist.txt)0
-rw-r--r--tools_libyuv/ubsan/OWNERS3
-rw-r--r--tools_libyuv/ubsan/blacklist.txt (renamed from files/tools_libyuv/ubsan/blacklist.txt)0
-rw-r--r--tools_libyuv/ubsan/vptr_blacklist.txt (renamed from files/tools_libyuv/ubsan/vptr_blacklist.txt)0
-rw-r--r--unit_test/basictypes_test.cc (renamed from files/unit_test/basictypes_test.cc)0
-rw-r--r--unit_test/color_test.cc (renamed from files/unit_test/color_test.cc)0
-rw-r--r--unit_test/compare_test.cc (renamed from files/unit_test/compare_test.cc)0
-rw-r--r--unit_test/convert_argb_test.cc (renamed from files/unit_test/convert_test.cc)2625
-rw-r--r--unit_test/convert_test.cc2110
-rw-r--r--unit_test/cpu_test.cc (renamed from files/unit_test/cpu_test.cc)157
-rw-r--r--unit_test/cpu_thread_test.cc (renamed from files/unit_test/cpu_thread_test.cc)0
-rw-r--r--unit_test/math_test.cc (renamed from files/unit_test/math_test.cc)0
-rw-r--r--unit_test/planar_test.cc (renamed from files/unit_test/planar_test.cc)305
-rw-r--r--unit_test/rotate_argb_test.cc (renamed from files/unit_test/rotate_argb_test.cc)106
-rw-r--r--unit_test/rotate_test.cc (renamed from files/unit_test/rotate_test.cc)363
-rw-r--r--unit_test/scale_argb_test.cc (renamed from files/unit_test/scale_argb_test.cc)0
-rw-r--r--unit_test/scale_plane_test.cc470
-rw-r--r--unit_test/scale_rgb_test.cc (renamed from files/unit_test/scale_rgb_test.cc)0
-rw-r--r--unit_test/scale_test.cc (renamed from files/unit_test/scale_test.cc)478
-rw-r--r--unit_test/scale_uv_test.cc (renamed from files/unit_test/scale_uv_test.cc)79
-rw-r--r--unit_test/testdata/arm_v7.txt (renamed from files/unit_test/testdata/arm_v7.txt)0
-rw-r--r--unit_test/testdata/juno.txt (renamed from files/unit_test/testdata/juno.txt)0
-rw-r--r--unit_test/testdata/mips.txt (renamed from files/unit_test/testdata/mips.txt)0
-rw-r--r--unit_test/testdata/mips_loongson2k.txt (renamed from files/unit_test/testdata/mips_loongson2k.txt)0
-rw-r--r--unit_test/testdata/mips_loongson3.txt (renamed from files/unit_test/testdata/mips_loongson3.txt)0
-rw-r--r--unit_test/testdata/mips_loongson_mmi.txt (renamed from files/unit_test/testdata/mips_loongson_mmi.txt)0
-rw-r--r--unit_test/testdata/mips_msa.txt (renamed from files/unit_test/testdata/mips_msa.txt)0
-rw-r--r--unit_test/testdata/riscv64.txt4
-rw-r--r--unit_test/testdata/riscv64_rvv.txt4
-rw-r--r--unit_test/testdata/riscv64_rvv_zvfh.txt4
-rw-r--r--unit_test/testdata/tegra3.txt (renamed from files/unit_test/testdata/tegra3.txt)0
-rw-r--r--unit_test/testdata/test0.jpg (renamed from files/unit_test/testdata/test0.jpg)bin421 -> 421 bytes
-rw-r--r--unit_test/testdata/test1.jpg (renamed from files/unit_test/testdata/test1.jpg)bin735 -> 735 bytes
-rw-r--r--unit_test/testdata/test2.jpg (renamed from files/unit_test/testdata/test2.jpg)bin685 -> 685 bytes
-rw-r--r--unit_test/testdata/test3.jpg (renamed from files/unit_test/testdata/test3.jpg)bin704 -> 704 bytes
-rw-r--r--unit_test/testdata/test4.jpg (renamed from files/unit_test/testdata/test4.jpg)bin701 -> 701 bytes
-rw-r--r--unit_test/unit_test.cc (renamed from files/unit_test/unit_test.cc)16
-rw-r--r--unit_test/unit_test.h (renamed from files/unit_test/unit_test.h)15
-rw-r--r--unit_test/video_common_test.cc (renamed from files/unit_test/video_common_test.cc)0
-rw-r--r--util/Makefile (renamed from files/util/Makefile)0
-rw-r--r--util/color.cc (renamed from files/util/color.cc)0
-rw-r--r--util/compare.cc (renamed from files/util/compare.cc)0
-rw-r--r--util/cpuid.c (renamed from files/util/cpuid.c)66
-rw-r--r--util/i444tonv12_eg.cc (renamed from files/util/i444tonv12_eg.cc)0
-rw-r--r--util/psnr.cc (renamed from files/util/psnr.cc)0
-rw-r--r--util/psnr.h (renamed from files/util/psnr.h)0
-rw-r--r--util/psnr_main.cc (renamed from files/util/psnr_main.cc)0
-rw-r--r--util/ssim.cc (renamed from files/util/ssim.cc)0
-rw-r--r--util/ssim.h (renamed from files/util/ssim.h)0
-rw-r--r--util/yuvconstants.c (renamed from files/util/yuvconstants.c)11
-rw-r--r--util/yuvconvert.cc (renamed from files/util/yuvconvert.cc)10
-rw-r--r--winarm.mk (renamed from files/winarm.mk)0
203 files changed, 20413 insertions, 16890 deletions
diff --git a/files/.clang-format b/.clang-format
index 59d48705..59d48705 100644
--- a/files/.clang-format
+++ b/.clang-format
diff --git a/files/.gitignore b/.gitignore
index 20d679b7..20d679b7 100644
--- a/files/.gitignore
+++ b/.gitignore
diff --git a/files/.gn b/.gn
index a765caa5..f9a5ee6c 100644
--- a/files/.gn
+++ b/.gn
@@ -34,7 +34,5 @@ exec_script_whitelist = build_dotfile_settings.exec_script_whitelist +
default_args = {
mac_sdk_min = "10.12"
-
- # https://bugs.chromium.org/p/libyuv/issues/detail?id=826
- ios_deployment_target = "10.0"
+ ios_deployment_target = "12.0"
}
diff --git a/files/.vpython b/.vpython
index 4a64fd21..4a64fd21 100644
--- a/files/.vpython
+++ b/.vpython
diff --git a/files/.vpython3 b/.vpython3
index 0a9aa38b..28d819e7 100644
--- a/files/.vpython3
+++ b/.vpython3
@@ -76,8 +76,8 @@ wheel: <
version: "version:5.8.0.chromium.2"
>
wheel: <
- name: "infra/python/wheels/requests-py2_py3"
- version: "version:2.26.0"
+ name: "infra/python/wheels/requests-py3"
+ version: "version:2.31.0"
>
# Used by various python unit tests.
diff --git a/files/AUTHORS b/AUTHORS
index 28c08956..28c08956 100644
--- a/files/AUTHORS
+++ b/AUTHORS
diff --git a/Android.bp b/Android.bp
index e4ed511c..506184e0 100644
--- a/Android.bp
+++ b/Android.bp
@@ -1,7 +1,6 @@
package {
default_applicable_licenses: ["external_libyuv_license"],
}
-
// Added automatically by a large-scale-change
// See: http://go/android-license-faq
license {
@@ -12,7 +11,183 @@ license {
],
license_text: [
"LICENSE",
+ "PATENTS",
],
}
-
subdirs = ["files"]
+
+cc_library {
+ name: "libyuv",
+ vendor_available: true,
+ product_available: true,
+ host_supported: true,
+
+ srcs: [
+ "source/compare.cc",
+ "source/compare_common.cc",
+ "source/compare_gcc.cc",
+ "source/compare_msa.cc",
+ "source/compare_neon.cc",
+ "source/compare_neon64.cc",
+ "source/convert.cc",
+ "source/convert_argb.cc",
+ "source/convert_from.cc",
+ "source/convert_from_argb.cc",
+ "source/convert_jpeg.cc",
+ "source/convert_to_argb.cc",
+ "source/convert_to_i420.cc",
+ "source/cpu_id.cc",
+ "source/mjpeg_decoder.cc",
+ "source/mjpeg_validate.cc",
+ "source/planar_functions.cc",
+ "source/rotate.cc",
+ "source/rotate_any.cc",
+ "source/rotate_argb.cc",
+ "source/rotate_common.cc",
+ "source/rotate_gcc.cc",
+ "source/rotate_msa.cc",
+ "source/rotate_neon.cc",
+ "source/rotate_neon64.cc",
+ "source/row_any.cc",
+ "source/row_common.cc",
+ "source/row_gcc.cc",
+ "source/row_msa.cc",
+ "source/row_neon.cc",
+ "source/row_neon64.cc",
+ "source/row_rvv.cc",
+ "source/scale.cc",
+ "source/scale_any.cc",
+ "source/scale_argb.cc",
+ "source/scale_common.cc",
+ "source/scale_gcc.cc",
+ "source/scale_msa.cc",
+ "source/scale_neon.cc",
+ "source/scale_neon64.cc",
+ "source/scale_rgb.cc",
+ "source/scale_rvv.cc",
+ "source/scale_uv.cc",
+ "source/video_common.cc",
+ ],
+
+ cflags: [
+ "-Wall",
+ "-Werror",
+ "-Wno-unused-parameter",
+ "-fexceptions",
+ "-DHAVE_JPEG",
+ "-DLIBYUV_UNLIMITED_DATA",
+ ],
+
+ arch: {
+ arm: {
+ cflags: ["-mfpu=neon"],
+ },
+ },
+
+ shared_libs: ["libjpeg"],
+
+ export_include_dirs: ["include"],
+
+ apex_available: [
+ "//apex_available:platform",
+ "com.android.media.swcodec",
+ "com.android.virt",
+ ],
+ min_sdk_version: "29",
+}
+
+// compatibilty static library until all uses of libyuv_static are replaced
+// with libyuv (b/37646797)
+cc_library_static {
+ name: "libyuv_static",
+ vendor_available: true,
+ whole_static_libs: ["libyuv"],
+ apex_available: [
+ "//apex_available:platform",
+ "com.android.media.swcodec",
+ ],
+ min_sdk_version: "29",
+}
+
+cc_test {
+ name: "libyuv_unittest",
+ static_libs: ["libyuv"],
+ shared_libs: ["libjpeg"],
+ cflags: ["-Wall", "-Werror"],
+ srcs: [
+ "unit_test/basictypes_test.cc",
+ "unit_test/color_test.cc",
+ "unit_test/compare_test.cc",
+ "unit_test/convert_test.cc",
+ "unit_test/cpu_test.cc",
+ "unit_test/cpu_thread_test.cc",
+ "unit_test/math_test.cc",
+ "unit_test/planar_test.cc",
+ "unit_test/rotate_argb_test.cc",
+ "unit_test/rotate_test.cc",
+ "unit_test/scale_argb_test.cc",
+ "unit_test/scale_plane_test.cc",
+ "unit_test/scale_rgb_test.cc",
+ "unit_test/scale_test.cc",
+ "unit_test/scale_uv_test.cc",
+ "unit_test/unit_test.cc",
+ "unit_test/video_common_test.cc",
+ ],
+}
+
+cc_test {
+ name: "compare",
+ gtest: false,
+ srcs: [
+ "util/compare.cc",
+ ],
+ static_libs: ["libyuv"],
+}
+
+cc_test {
+ name: "cpuid",
+ gtest: false,
+ srcs: [
+ "util/cpuid.c",
+ ],
+ static_libs: ["libyuv"],
+}
+
+cc_test {
+ name: "i444tonv12_eg",
+ gtest: false,
+ srcs: [
+ "util/i444tonv12_eg.cc",
+ ],
+ static_libs: ["libyuv"],
+}
+
+cc_test {
+ name: "psnr",
+ gtest: false,
+ srcs: [
+ "util/psnr_main.cc",
+ "util/psnr.cc",
+ "util/ssim.cc",
+ ],
+ static_libs: ["libyuv"],
+}
+
+cc_test {
+ name: "yuvconstants",
+ gtest: false,
+ srcs: [
+ "util/yuvconstants.c",
+ ],
+ static_libs: ["libyuv"],
+}
+
+cc_test {
+ name: "yuvconvert",
+ gtest: false,
+ srcs: [
+ "util/yuvconvert.cc",
+ ],
+ static_libs: ["libyuv"],
+ shared_libs: ["libjpeg"],
+}
diff --git a/BUILD b/BUILD
deleted file mode 100644
index 3145e36a..00000000
--- a/BUILD
+++ /dev/null
@@ -1,14 +0,0 @@
-# Copyright 2011 Google Inc. All Rights Reserved.
-#
-# Description:
-# The libyuv package provides implementation yuv image conversion and
-# scaling.
-#
-# This library is used by Talk Video and WebRTC.
-#
-
-licenses(['notice']) # 3-clause BSD
-
-exports_files(['LICENSE'])
-
-package(default_visibility = ['//visibility:public'])
diff --git a/files/BUILD.gn b/BUILD.gn
index a72ff065..2c600b22 100644
--- a/files/BUILD.gn
+++ b/BUILD.gn
@@ -6,6 +6,7 @@
# in the file PATENTS. All contributing project authors may
# be found in the AUTHORS file in the root of the source tree.
+import("//build/config/features.gni")
import("//testing/test.gni")
import("libyuv.gni")
@@ -21,15 +22,25 @@ declare_args() {
config("libyuv_config") {
include_dirs = [ "include" ]
- if (is_android && current_cpu == "arm64") {
- ldflags = [ "-Wl,--dynamic-linker,/system/bin/linker64" ]
- }
- if (is_android && current_cpu != "arm64") {
- ldflags = [ "-Wl,--dynamic-linker,/system/bin/linker" ]
+ if (is_android) {
+ if (target_cpu == "arm" || target_cpu == "x86" || target_cpu == "mipsel") {
+ ldflags = [ "-Wl,--dynamic-linker,/system/bin/linker" ]
+ } else {
+ ldflags = [ "-Wl,--dynamic-linker,/system/bin/linker64" ]
+ }
}
-
+ defines = []
if (!libyuv_use_neon) {
- defines = [ "LIBYUV_DISABLE_NEON" ]
+ defines += [ "LIBYUV_DISABLE_NEON" ]
+ }
+ if (libyuv_disable_rvv) {
+ defines += [ "LIBYUV_DISABLE_RVV" ]
+ }
+ if (!libyuv_use_lsx) {
+ defines += [ "LIBYUV_DISABLE_LSX" ]
+ }
+ if (!libyuv_use_lasx) {
+ defines += [ "LIBYUV_DISABLE_LASX" ]
}
}
@@ -69,6 +80,14 @@ group("libyuv") {
deps += [ ":libyuv_msa" ]
}
+ if (libyuv_use_lsx) {
+ deps += [ ":libyuv_lsx" ]
+ }
+
+ if (libyuv_use_lasx) {
+ deps += [ ":libyuv_lasx" ]
+ }
+
if (!is_ios && !libyuv_disable_jpeg) {
# Make sure that clients of libyuv link with libjpeg. This can't go in
# libyuv_internal because in Windows x64 builds that will generate a clang
@@ -129,6 +148,7 @@ static_library("libyuv_internal") {
"source/row_any.cc",
"source/row_common.cc",
"source/row_gcc.cc",
+ "source/row_rvv.cc",
"source/row_win.cc",
"source/scale.cc",
"source/scale_any.cc",
@@ -136,6 +156,7 @@ static_library("libyuv_internal") {
"source/scale_common.cc",
"source/scale_gcc.cc",
"source/scale_rgb.cc",
+ "source/scale_rvv.cc",
"source/scale_uv.cc",
"source/scale_win.cc",
"source/video_common.cc",
@@ -150,7 +171,7 @@ static_library("libyuv_internal") {
configs += [ "//build/config/gcc:symbol_visibility_default" ]
}
- if (!is_ios && !libyuv_disable_jpeg) {
+ if ((!is_ios || use_blink) && !libyuv_disable_jpeg) {
defines += [ "HAVE_JPEG" ]
# Needed to pull in libjpeg headers. Can't add //third_party:jpeg to deps
@@ -229,6 +250,44 @@ if (libyuv_use_msa) {
}
}
+if (libyuv_use_lsx) {
+ static_library("libyuv_lsx") {
+ sources = [
+ # LSX Source Files
+ "source/rotate_lsx.cc",
+ "source/row_lsx.cc",
+ "source/scale_lsx.cc",
+ ]
+
+ cflags_cc = [
+ "-mlsx",
+ "-Wno-c++11-narrowing",
+ ]
+
+ deps = [ ":libyuv_internal" ]
+
+ public_configs = [ ":libyuv_config" ]
+ }
+}
+
+if (libyuv_use_lasx) {
+ static_library("libyuv_lasx") {
+ sources = [
+ # LASX Source Files
+ "source/row_lasx.cc",
+ ]
+
+ cflags_cc = [
+ "-mlasx",
+ "-Wno-c++11-narrowing",
+ ]
+
+ deps = [ ":libyuv_internal" ]
+
+ public_configs = [ ":libyuv_config" ]
+ }
+}
+
if (libyuv_include_tests) {
config("libyuv_unittest_warnings_config") {
if (!is_win) {
@@ -256,6 +315,7 @@ if (libyuv_include_tests) {
"unit_test/basictypes_test.cc",
"unit_test/color_test.cc",
"unit_test/compare_test.cc",
+ "unit_test/convert_argb_test.cc",
"unit_test/convert_test.cc",
"unit_test/cpu_test.cc",
"unit_test/cpu_thread_test.cc",
@@ -264,6 +324,7 @@ if (libyuv_include_tests) {
"unit_test/rotate_argb_test.cc",
"unit_test/rotate_test.cc",
"unit_test/scale_argb_test.cc",
+ "unit_test/scale_plane_test.cc",
"unit_test/scale_rgb_test.cc",
"unit_test/scale_test.cc",
"unit_test/scale_uv_test.cc",
diff --git a/files/CM_linux_packages.cmake b/CM_linux_packages.cmake
index 5f676f89..a073edfa 100644
--- a/files/CM_linux_packages.cmake
+++ b/CM_linux_packages.cmake
@@ -8,7 +8,7 @@ SET ( YUV_VER_MAJOR 0 )
SET ( YUV_VER_MINOR 0 )
SET ( YUV_VER_PATCH ${YUV_VERSION_NUMBER} )
SET ( YUV_VERSION ${YUV_VER_MAJOR}.${YUV_VER_MINOR}.${YUV_VER_PATCH} )
-MESSAGE ( "Building ver.: ${YUV_VERSION}" )
+MESSAGE ( VERBOSE "Building ver.: ${YUV_VERSION}" )
# is this a 32-bit or 64-bit build?
IF ( CMAKE_SIZEOF_VOID_P EQUAL 8 )
@@ -45,7 +45,7 @@ ELSE ()
SET ( YUV_SYSTEM_NAME "amd-${YUV_BIT_SIZE}" )
ENDIF ()
ENDIF ()
-MESSAGE ( "Packaging for: ${YUV_SYSTEM_NAME}" )
+MESSAGE ( VERBOSE "Packaging for: ${YUV_SYSTEM_NAME}" )
# define all the variables needed by CPack to create .deb and .rpm packages
SET ( CPACK_PACKAGE_VENDOR "Frank Barchard" )
diff --git a/files/CMakeLists.txt b/CMakeLists.txt
index d190507b..9abfa74b 100644
--- a/files/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -4,7 +4,7 @@
PROJECT ( YUV C CXX ) # "C" is required even for C++ projects
CMAKE_MINIMUM_REQUIRED( VERSION 2.8.12 )
-OPTION( TEST "Built unit tests" OFF )
+OPTION( UNIT_TEST "Built unit tests" OFF )
SET ( ly_base_dir ${PROJECT_SOURCE_DIR} )
SET ( ly_src_dir ${ly_base_dir}/source )
@@ -37,22 +37,32 @@ if(WIN32)
SET_TARGET_PROPERTIES ( ${ly_lib_shared} PROPERTIES IMPORT_PREFIX "lib" )
endif()
+# this creates the cpuid tool
+ADD_EXECUTABLE ( cpuid ${ly_base_dir}/util/cpuid.c )
+TARGET_LINK_LIBRARIES ( cpuid ${ly_lib_static} )
+
# this creates the conversion tool
ADD_EXECUTABLE ( yuvconvert ${ly_base_dir}/util/yuvconvert.cc )
TARGET_LINK_LIBRARIES ( yuvconvert ${ly_lib_static} )
+# this creates the yuvconstants tool
+ADD_EXECUTABLE ( yuvconstants ${ly_base_dir}/util/yuvconstants.c )
+TARGET_LINK_LIBRARIES ( yuvconstants ${ly_lib_static} )
-INCLUDE ( FindJPEG )
+find_package ( JPEG )
if (JPEG_FOUND)
include_directories( ${JPEG_INCLUDE_DIR} )
- target_link_libraries( yuvconvert ${JPEG_LIBRARY} )
+ target_link_libraries( ${ly_lib_shared} ${JPEG_LIBRARY} )
add_definitions( -DHAVE_JPEG )
endif()
-if(TEST)
+if(UNIT_TEST)
find_library(GTEST_LIBRARY gtest)
if(GTEST_LIBRARY STREQUAL "GTEST_LIBRARY-NOTFOUND")
set(GTEST_SRC_DIR /usr/src/gtest CACHE STRING "Location of gtest sources")
+ if (CMAKE_CROSSCOMPILING)
+ set(GTEST_SRC_DIR third_party/googletest/src/googletest)
+ endif()
if(EXISTS ${GTEST_SRC_DIR}/src/gtest-all.cc)
message(STATUS "building gtest from sources in ${GTEST_SRC_DIR}")
set(gtest_sources ${GTEST_SRC_DIR}/src/gtest-all.cc)
@@ -61,7 +71,7 @@ if(TEST)
include_directories(${GTEST_SRC_DIR}/include)
set(GTEST_LIBRARY gtest)
else()
- message(FATAL_ERROR "TEST is set but unable to find gtest library")
+ message(FATAL_ERROR "UNIT_TEST is set but unable to find gtest library")
endif()
endif()
@@ -78,6 +88,12 @@ if(TEST)
if(NACL AND NACL_LIBC STREQUAL "newlib")
target_link_libraries(libyuv_unittest glibc-compat)
endif()
+
+ find_library(GFLAGS_LIBRARY gflags)
+ if(NOT GFLAGS_LIBRARY STREQUAL "GFLAGS_LIBRARY-NOTFOUND")
+ target_link_libraries(libyuv_unittest gflags)
+ add_definitions(-DLIBYUV_USE_GFLAGS)
+ endif()
endif()
diff --git a/files/DEPS b/DEPS
index 3cf2dbe0..70ed1d58 100644
--- a/files/DEPS
+++ b/DEPS
@@ -5,43 +5,62 @@ gclient_gn_args = [
vars = {
'chromium_git': 'https://chromium.googlesource.com',
- 'chromium_revision': '829c6df33dce1085a61d8fd44209fc84bbf9a6a7',
- 'gn_version': 'git_revision:6f13aaac55a977e1948910942675c69f2b4f7a94',
+ 'chromium_revision': 'af3d01376bec75a68f90160bfd38057d60510a2b',
+ 'gn_version': 'git_revision:fae280eabe5d31accc53100137459ece19a7a295',
+ # ninja CIPD package version.
+ # https://chrome-infra-packages.appspot.com/p/infra/3pp/tools/ninja
+ 'ninja_version': 'version:2@1.11.1.chromium.6',
+ # reclient CIPD package version
+ 'reclient_version': 're_client_version:0.110.0.43ec6b1-gomaip',
# Keep the Chromium default of generating location tags.
'generate_location_tags': True,
+
+ # By default, download the fuchsia sdk from the public sdk directory.
+ 'fuchsia_sdk_cipd_prefix': 'fuchsia/sdk/core/',
+ 'fuchsia_version': 'version:15.20230909.2.1',
+ # By default, download the fuchsia images from the fuchsia GCS bucket.
+ 'fuchsia_images_bucket': 'fuchsia',
+ 'checkout_fuchsia': False,
+ # Since the images are hundreds of MB, default to only downloading the image
+ # most commonly useful for developers. Bots and developers that need to use
+ # other images can override this with additional images.
+ 'checkout_fuchsia_boot_images': "terminal.qemu-x64,terminal.x64",
+ 'checkout_fuchsia_product_bundles': '"{checkout_fuchsia_boot_images}" != ""',
}
deps = {
'src/build':
- Var('chromium_git') + '/chromium/src/build' + '@' + 'dcea3443035f48d58193788e0bc56daca4e5db33',
+ Var('chromium_git') + '/chromium/src/build' + '@' + '5885d3c24833ad72845a52a1b913a2b8bc651b56',
'src/buildtools':
- Var('chromium_git') + '/chromium/src/buildtools' + '@' + '075dd7e22837a69189003e4fa84499acf63188cf',
+ Var('chromium_git') + '/chromium/src/buildtools' + '@' + '79ab87fa54614258c4c95891e873223371194525',
'src/testing':
- Var('chromium_git') + '/chromium/src/testing' + '@' + 'f4e42be13265ec304b0f3085eee2b15f30f44077',
+ Var('chromium_git') + '/chromium/src/testing' + '@' + '51e9a02297057cc0e917763a51e16680b7d16fb6',
'src/third_party':
- Var('chromium_git') + '/chromium/src/third_party' + '@' + '42c249feeb71bc0cd184849f0509aefef599343d',
+ Var('chromium_git') + '/chromium/src/third_party' + '@' + '2dc4b18abd1003ce7b1eda509dc96f12d49a9667',
'src/buildtools/linux64': {
'packages': [
{
- 'package': 'gn/gn/linux-amd64',
+ 'package': 'gn/gn/linux-${{arch}}',
'version': Var('gn_version'),
}
],
'dep_type': 'cipd',
- 'condition': 'checkout_linux',
+ 'condition': 'host_os == "linux"',
},
+
'src/buildtools/mac': {
'packages': [
{
- 'package': 'gn/gn/mac-amd64',
+ 'package': 'gn/gn/mac-${{arch}}',
'version': Var('gn_version'),
}
],
'dep_type': 'cipd',
- 'condition': 'checkout_mac',
+ 'condition': 'host_os == "mac"',
},
+
'src/buildtools/win': {
'packages': [
{
@@ -50,43 +69,60 @@ deps = {
}
],
'dep_type': 'cipd',
- 'condition': 'checkout_win',
+ 'condition': 'host_os == "win"',
},
- 'src/buildtools/clang_format/script':
- Var('chromium_git') + '/external/github.com/llvm/llvm-project/clang/tools/clang-format.git' + '@' + '99876cacf78329e5f99c244dbe42ccd1654517a0',
- 'src/buildtools/third_party/libc++/trunk':
- Var('chromium_git') + '/external/github.com/llvm/llvm-project/libcxx.git' + '@' + '79a2e924d96e2fc1e4b937c42efd08898fa472d7',
- 'src/buildtools/third_party/libc++abi/trunk':
- Var('chromium_git') + '/external/github.com/llvm/llvm-project/libcxxabi.git' + '@' + '665b74f7d1b3bb295cd6ba7d8fcec1acd3d2ac84',
- 'src/buildtools/third_party/libunwind/trunk':
- Var('chromium_git') + '/external/github.com/llvm/llvm-project/libunwind.git' + '@' + 'f51a154281bdfe746c46c07cd4fb05be97f9441d',
+ 'src/buildtools/reclient': {
+ 'packages': [
+ {
+ 'package': 'infra/rbe/client/${{platform}}',
+ 'version': Var('reclient_version'),
+ }
+ ],
+ 'dep_type': 'cipd',
+ },
'src/third_party/catapult':
- Var('chromium_git') + '/catapult.git' + '@' + '75423c310eb303d28978be892fcf7b9c2c824909',
+ Var('chromium_git') + '/catapult.git' + '@' + 'fa05d995e152efdae488a2aeba397cd609fdbc9d',
+ 'src/third_party/clang-format/script':
+ Var('chromium_git') + '/external/github.com/llvm/llvm-project/clang/tools/clang-format.git' + '@' + 'f97059df7f8b205064625cdb5f97b56668a125ef',
'src/third_party/colorama/src':
- Var('chromium_git') + '/external/colorama.git' + '@' + '799604a1041e9b3bc5d2789ecbd7e8db2e18e6b8',
+ Var('chromium_git') + '/external/colorama.git' + '@' + '3de9f013df4b470069d03d250224062e8cf15c49',
+ 'src/third_party/cpu_features/src': {
+ 'url': Var('chromium_git') + '/external/github.com/google/cpu_features.git' + '@' + '936b9ab5515dead115606559502e3864958f7f6e',
+ 'condition': 'checkout_android',
+ },
'src/third_party/depot_tools':
- Var('chromium_git') + '/chromium/tools/depot_tools.git' + '@' + '2ffa1bde797a8127c0f72908d0bd74051fd65d0d',
+ Var('chromium_git') + '/chromium/tools/depot_tools.git' + '@' + 'd3e43dd4319ba169c0aaf44547eecf861f2fe5da',
'src/third_party/freetype/src':
- Var('chromium_git') + '/chromium/src/third_party/freetype2.git' + '@' + 'cff026d41599945498044d2f4dcc0e610ffb6929',
+ Var('chromium_git') + '/chromium/src/third_party/freetype2.git' + '@' + '9e3c5d7e183c1a8d5ed8868d7d28ef18d3ec9ec8',
+ 'third_party/fuchsia-gn-sdk': {
+ 'url': Var('chromium_git') + '/chromium/src/third_party/fuchsia-gn-sdk.git' + '@' + '0d6902558d92fe3d49ba9a8f638ddea829be595b',
+ 'condition': 'checkout_fuchsia',
+ },
'src/third_party/googletest/src':
- Var('chromium_git') + '/external/github.com/google/googletest.git' + '@' + 'e2f3978937c0244508135f126e2617a7734a68be',
+ Var('chromium_git') + '/external/github.com/google/googletest.git' + '@' + 'af29db7ec28d6df1c7f0f745186884091e602e07',
'src/third_party/harfbuzz-ng/src':
- Var('chromium_git') + '/external/github.com/harfbuzz/harfbuzz.git' + '@' + '64b29dbd5994a511acee69cb9b45ad650ef88359',
+ Var('chromium_git') + '/external/github.com/harfbuzz/harfbuzz.git' + '@' + 'db700b5670d9475cc8ed4880cc9447b232c5e432',
+ 'src/third_party/libc++/src':
+ Var('chromium_git') + '/external/github.com/llvm/llvm-project/libcxx.git' + '@' + '84fb809dd6dae36d556dc0bb702c6cc2ce9d4b80',
+ 'src/third_party/libc++abi/src':
+ Var('chromium_git') + '/external/github.com/llvm/llvm-project/libcxxabi.git' + '@' + '8d21803b9076b16d46c32e2f10da191ee758520c',
+ 'src/third_party/libunwind/src':
+ Var('chromium_git') + '/external/github.com/llvm/llvm-project/libunwind.git' + '@' + 'f1c687e0aaf0d70b9a53a150e9be5cb63af9215f',
'src/third_party/libjpeg_turbo':
- Var('chromium_git') + '/chromium/deps/libjpeg_turbo.git' + '@' + '02959c3ee17abacfd1339ec22ea93301292ffd56',
+ Var('chromium_git') + '/chromium/deps/libjpeg_turbo.git' + '@' + '30bdb85e302ecfc52593636b2f44af438e05e784',
'src/third_party/nasm':
- Var('chromium_git') + '/chromium/deps/nasm.git' + '@' + '9215e8e1d0fe474ffd3e16c1a07a0f97089e6224',
+ Var('chromium_git') + '/chromium/deps/nasm.git' + '@' + '7fc833e889d1afda72c06220e5bed8fb43b2e5ce',
'src/tools':
- Var('chromium_git') + '/chromium/src/tools' + '@' + '198dc879529652b39ba6e223bcc0bcad5f1facd6',
+ Var('chromium_git') + '/chromium/src/tools' + '@' + 'a76c0dbb64c603a0d45e0c6dfae3a351b6e1adf1',
# libyuv-only dependencies (not present in Chromium).
'src/third_party/gtest-parallel':
Var('chromium_git') + '/external/webrtc/deps/third_party/gtest-parallel' + '@' + '1dad0e9f6d82ff994130b529d7d814b40eb32b0e',
'src/third_party/lss': {
- 'url': Var('chromium_git') + '/linux-syscall-support.git' + '@' + '92a65a8f5d705d1928874420c8d0d15bde8c89e5',
+ 'url': Var('chromium_git') + '/linux-syscall-support.git' + '@' + 'ce877209e11aa69dcfffbd53ef90ea1d07136521',
'condition': 'checkout_android or checkout_linux',
},
@@ -101,14 +137,32 @@ deps = {
'condition': 'checkout_android',
'dep_type': 'cipd',
},
- 'src/third_party/auto/src': {
- 'url': Var('chromium_git') + '/external/github.com/google/auto.git' + '@' + 'fe67d853d6356943dc79541c892ab6d3e6a7b61a',
- 'condition': 'checkout_android',
+
+ 'src/third_party/kotlin_stdlib': {
+ 'packages': [
+ {
+ 'package': 'chromium/third_party/kotlin_stdlib',
+ 'version': 'Z1gsqhL967kFQecxKrRwXHbl-vwQjpv0l7PMUZ0EVO8C',
+ },
+ ],
+ 'condition': 'checkout_android',
+ 'dep_type': 'cipd',
+ },
+ 'src/third_party/kotlinc/current': {
+ 'packages': [
+ {
+ 'package': 'chromium/third_party/kotlinc',
+ 'version': 'Rr02Gf2EkaeSs3EhSUHhPqDHSd1AzimrM6cRYUJCPjQC',
+ },
+ ],
+ 'condition': 'checkout_android',
+ 'dep_type': 'cipd',
},
+
'src/third_party/boringssl/src':
- 'https://boringssl.googlesource.com/boringssl.git' + '@' + '3a667d10e94186fd503966f5638e134fe9fb4080',
+ 'https://boringssl.googlesource.com/boringssl.git' + '@' + '20a06474c0b4a16779311bfe98ba69dc2402101d',
'src/base': {
- 'url': Var('chromium_git') + '/chromium/src/base' + '@' + 'e9e639622449a893a1b5e32781d072cec08ead72',
+ 'url': Var('chromium_git') + '/chromium/src/base' + '@' + 'd407b7061bce341bb6e11b539ea86c46c949ac4c',
'condition': 'checkout_android',
},
'src/third_party/bazel': {
@@ -131,20 +185,28 @@ deps = {
'condition': 'checkout_android',
'dep_type': 'cipd',
},
- 'src/third_party/android_ndk': {
- 'url': Var('chromium_git') + '/android_ndk.git' + '@' + '401019bf85744311b26c88ced255cd53401af8b7',
- 'condition': 'checkout_android',
+ 'src/third_party/android_toolchain': {
+ 'packages': [
+ {
+ 'package': 'chromium/third_party/android_toolchain/android_toolchain',
+ 'version': 'R_8suM8m0oHbZ1awdxGXvKEFpAOETscbfZxkkMthyk8C',
+ },
+ ],
+ 'condition': 'checkout_android',
+ 'dep_type': 'cipd',
},
+
'src/third_party/androidx': {
'packages': [
{
'package': 'chromium/third_party/androidx',
- 'version': '6d8ij5pzYh29WWjPbdbAWFBJSA1nUgkWf2p6wCVZKIsC',
+ 'version': 'y7rF_rx56mD3FGhMiqnlbQ6HOqHJ95xUFNX1m-_a988C',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_support_test_runner': {
'packages': [
{
@@ -158,16 +220,12 @@ deps = {
'src/third_party/android_sdk/public': {
'packages': [
{
- 'package': 'chromium/third_party/android_sdk/public/build-tools/31.0.0',
- 'version': 'tRoD45SCi7UleQqSV7MrMQO1_e5P8ysphkCcj6z_cCQC',
+ 'package': 'chromium/third_party/android_sdk/public/build-tools/34.0.0',
+ 'version': 'YK9Rzw3fDzMHVzatNN6VlyoD_81amLZpN1AbmkdOd6AC',
},
{
'package': 'chromium/third_party/android_sdk/public/emulator',
- 'version': 'gMHhUuoQRKfxr-MBn3fNNXZtkAVXtOwMwT7kfx8jkIgC',
- },
- {
- 'package': 'chromium/third_party/android_sdk/public/extras',
- 'version': 'ppQ4TnqDvBHQ3lXx5KPq97egzF5X2FFyOrVHkGmiTMQC',
+ 'version': '9lGp8nTUCRRWGMnI_96HcKfzjnxEJKUcfvfwmA3wXNkC',
},
{
'package': 'chromium/third_party/android_sdk/public/patcher',
@@ -175,11 +233,15 @@ deps = {
},
{
'package': 'chromium/third_party/android_sdk/public/platform-tools',
- 'version': 'g7n_-r6yJd_SGRklujGB1wEt8iyr77FZTUJVS9w6O34C',
+ 'version': 'HWVsGs2HCKgSVv41FsOcsfJbNcB0UFiNrF6Tc4yRArYC',
},
{
- 'package': 'chromium/third_party/android_sdk/public/platforms/android-31',
- 'version': 'lL3IGexKjYlwjO_1Ga-xwxgwbE_w-lmi2Zi1uOlWUIAC',
+ 'package': 'chromium/third_party/android_sdk/public/platforms/android-34',
+ 'version': 'u-bhWbTME6u-DjypTgr3ZikCyeAeU6txkR9ET6Uudc8C',
+ },
+ {
+ 'package': 'chromium/third_party/android_sdk/public/platforms/android-tiramisuprivacysandbox',
+ 'version': 'YWMYkzyxGBgVsty0GhXL1oxbY0pGXQIgFc0Rh7ZMRPYC',
},
{
'package': 'chromium/third_party/android_sdk/public/sources/android-31',
@@ -187,7 +249,7 @@ deps = {
},
{
'package': 'chromium/third_party/android_sdk/public/cmdline-tools',
- 'version': 'Ez2NWws2SJYCF6qw2O-mSCqK6424l3ZdSTpppLyVR_cC',
+ 'version': 'EWnL2r7oV5GtE9Ef7GyohyFam42wtMtEKYU4dCb3U1YC',
},
],
'condition': 'checkout_android',
@@ -207,7 +269,7 @@ deps = {
'packages': [
{
'package': 'chromium/third_party/android_build_tools/aapt2',
- 'version': 'version:3.6.0-alpha03-5516695-cr0',
+ 'version': 'STY0BXlZxsEhudnlXQFed-B5UpwehcoM0sYqor6qRqsC',
},
],
'condition': 'checkout_android',
@@ -223,6 +285,16 @@ deps = {
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+ 'src/third_party/byte_buddy/android_sdk_build_tools_25_0_2': {
+ 'packages': [
+ {
+ 'package': 'chromium/third_party/android_sdk/public/build-tools',
+ 'version': 'kwIs2vdfTm93yEP8LG5aSnchN4BVEdVxbqQtF4XpPdkC',
+ },
+ ],
+ 'condition': 'checkout_android',
+ 'dep_type': 'cipd',
+ },
'src/third_party/ced/src': {
'url': Var('chromium_git') + '/external/github.com/google/compact_enc_det.git' + '@' + 'ba412eaaacd3186085babcd901679a48863c7dd5',
'condition': 'checkout_android',
@@ -267,7 +339,7 @@ deps = {
},
'src/third_party/icu': {
- 'url': Var('chromium_git') + '/chromium/deps/icu.git' + '@' + 'bf66d373ae781a3498f2babe7b61d933dd774b82',
+ 'url': Var('chromium_git') + '/chromium/deps/icu.git' + '@' + 'e8c3bc9ea97d4423ad0515e5f1c064f486dae8b1',
},
'src/third_party/icu4j': {
'packages': [
@@ -293,11 +365,7 @@ deps = {
'packages': [
{
'package': 'chromium/third_party/jdk',
- 'version': 'PfRSnxe8Od6WU4zBXomq-zsgcJgWmm3z4gMQNB-r2QcC',
- },
- {
- 'package': 'chromium/third_party/jdk/extras',
- 'version': 'fkhuOQ3r-zKtWEdKplpo6k0vKkjl-LY_rJTmtzFCQN4C',
+ 'version': 'GCFtf5t6M4HlrHj6NXedHbpHp2xjgognF8ptNci4478C',
},
],
'condition': 'checkout_android',
@@ -308,22 +376,31 @@ deps = {
'condition': 'checkout_android',
},
'src/third_party/junit/src': {
- 'url': Var('chromium_git') + '/external/junit.git' + '@' + '64155f8a9babcfcf4263cf4d08253a1556e75481',
+ 'url': Var('chromium_git') + '/external/junit.git' + '@' + '05fe2a64f59127c02135be22f416e91260d6ede6',
'condition': 'checkout_android',
},
'src/third_party/libunwindstack': {
- 'url': Var('chromium_git') + '/chromium/src/third_party/libunwindstack.git' + '@' + '6868358481bb1e5e20d155c1084dc436c88b5e6b',
+ 'url': Var('chromium_git') + '/chromium/src/third_party/libunwindstack.git' + '@' + '4dbfa0e8c844c8e243b297bc185e54a99ff94f9e',
'condition': 'checkout_android',
},
+ 'src/third_party/ninja': {
+ 'packages': [
+ {
+ 'package': 'infra/3pp/tools/ninja/${{platform}}',
+ 'version': Var('ninja_version'),
+ }
+ ],
+ 'dep_type': 'cipd',
+ },
'src/third_party/mockito/src': {
- 'url': Var('chromium_git') + '/external/mockito/mockito.git' + '@' + '04a2a289a4222f80ad20717c25144981210d2eac',
+ 'url': Var('chromium_git') + '/external/mockito/mockito.git' + '@' + '7c3641bcef717ffa7d765f2c86b847d0aab1aac9',
'condition': 'checkout_android',
},
'src/third_party/objenesis': {
'packages': [
{
'package': 'chromium/third_party/objenesis',
- 'version': '9e367f55e5a65781ee77bfcbaa88fb82b30e75c0',
+ 'version': 'tknDblENYi8IaJYyD6tUahUyHYZlzJ_Y74_QZSz4DpIC',
},
],
'condition': 'checkout_android',
@@ -343,7 +420,20 @@ deps = {
'packages': [
{
'package': 'chromium/third_party/r8',
- 'version': 'Nu_mvQJe34CotIXadFlA3w732CJ9EvQGuVs4udcZedAC',
+ 'version': 'O1BBWiBTIeNUcraX8STMtQXVaCleu6SJJjWCcnfhPLkC',
+ },
+ ],
+ 'condition': 'checkout_android',
+ 'dep_type': 'cipd',
+ },
+ # This duplication is intentional, so we avoid updating the r8.jar used by
+ # dexing unless necessary, since each update invalidates all incremental
+ # dexing and unnecessarily slows down all bots.
+ 'src/third_party/r8/d8': {
+ 'packages': [
+ {
+ 'package': 'chromium/third_party/r8',
+ 'version': 'vw5kLlW3-suSlCKSO9OQpFWpR8oDnvQ8k1RgKNUapQYC',
},
],
'condition': 'checkout_android',
@@ -360,14 +450,14 @@ deps = {
'dep_type': 'cipd',
},
'src/third_party/requests/src': {
- 'url': Var('chromium_git') + '/external/github.com/kennethreitz/requests.git' + '@' + 'refs/tags/v2.23.0',
+ 'url': Var('chromium_git') + '/external/github.com/kennethreitz/requests.git' + '@' + 'c7e0fc087ceeadb8b4c84a0953a422c474093d6d',
'condition': 'checkout_android',
},
'src/third_party/robolectric': {
'packages': [
{
'package': 'chromium/third_party/robolectric',
- 'version': 'iC6RDM5EH3GEAzR-1shW_Mg0FeeNE5shq1okkFfuuNQC',
+ 'version': 'hzetqh1qFI32FOgQroZvGcGdomrgVBJ6WKRnl1KFw6EC',
},
],
'condition': 'checkout_android',
@@ -377,7 +467,7 @@ deps = {
'packages': [
{
'package': 'chromium/third_party/sqlite4java',
- 'version': '889660698187baa7c8b0d79f7bf58563125fbd66',
+ 'version': 'LofjKH9dgXIAJhRYCPQlMFywSwxYimrfDeBmaHc-Z5EC',
},
],
'condition': 'checkout_android',
@@ -387,7 +477,7 @@ deps = {
'packages': [
{
'package': 'chromium/third_party/turbine',
- 'version': 'Om6yIEXgJxuqghErK29h9RcMH6VaymMbxwScwXmcN6EC',
+ 'version': '2I2Nz480QsuCxpQ1lMfbigX8l5HAhX3_ykWU4TKRGo4C',
},
],
'condition': 'checkout_android',
@@ -400,1718 +490,1822 @@ deps = {
# iOS deps:
'src/ios': {
- 'url': Var('chromium_git') + '/chromium/src/ios' + '@' + '81826d980c159f949c2c7901f4dbec9a09788964',
+ 'url': Var('chromium_git') + '/chromium/src/ios' + '@' + 'ddd58e86cf4ebdc0db60a5d0f3c323de49bb295c',
'condition': 'checkout_ios'
},
# Everything coming after this is automatically updated by the auto-roller.
# === ANDROID_DEPS Generated Code Start ===
-
+ # Generated by //third_party/android_deps/fetch_all.py
'src/third_party/android_deps/libs/android_arch_core_common': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/android_arch_core_common',
- 'version': 'version:2@1.1.1.cr0',
+ 'version': 'version:2@1.1.1.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/android_arch_core_runtime': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/android_arch_core_runtime',
- 'version': 'version:2@1.1.1.cr0',
+ 'version': 'version:2@1.1.1.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/android_arch_lifecycle_common': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/android_arch_lifecycle_common',
- 'version': 'version:2@1.1.1.cr0',
+ 'version': 'version:2@1.1.1.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/android_arch_lifecycle_common_java8': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/android_arch_lifecycle_common_java8',
- 'version': 'version:2@1.1.1.cr0',
+ 'version': 'version:2@1.1.1.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/android_arch_lifecycle_livedata': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/android_arch_lifecycle_livedata',
- 'version': 'version:2@1.1.1.cr0',
+ 'version': 'version:2@1.1.1.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/android_arch_lifecycle_livedata_core': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/android_arch_lifecycle_livedata_core',
- 'version': 'version:2@1.1.1.cr0',
+ 'version': 'version:2@1.1.1.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/android_arch_lifecycle_runtime': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/android_arch_lifecycle_runtime',
- 'version': 'version:2@1.1.1.cr0',
+ 'version': 'version:2@1.1.1.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/android_arch_lifecycle_viewmodel': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/android_arch_lifecycle_viewmodel',
- 'version': 'version:2@1.1.1.cr0',
- },
- ],
- 'condition': 'checkout_android',
- 'dep_type': 'cipd',
- },
- 'src/third_party/android_deps/libs/backport_util_concurrent_backport_util_concurrent': {
- 'packages': [
- {
- 'package': 'chromium/third_party/android_deps/libs/backport_util_concurrent_backport_util_concurrent',
- 'version': 'version:2@3.1.cr0',
- },
- ],
- 'condition': 'checkout_android',
- 'dep_type': 'cipd',
- },
- 'src/third_party/android_deps/libs/classworlds_classworlds': {
- 'packages': [
- {
- 'package': 'chromium/third_party/android_deps/libs/classworlds_classworlds',
- 'version': 'version:2@1.1-alpha-2.cr0',
+ 'version': 'version:2@1.1.1.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_android_support_animated_vector_drawable': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_android_support_animated_vector_drawable',
- 'version': 'version:2@28.0.0.cr0',
+ 'version': 'version:2@28.0.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_android_support_appcompat_v7': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_android_support_appcompat_v7',
- 'version': 'version:2@28.0.0.cr0',
+ 'version': 'version:2@28.0.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_android_support_asynclayoutinflater': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_android_support_asynclayoutinflater',
- 'version': 'version:2@28.0.0.cr0',
+ 'version': 'version:2@28.0.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_android_support_cardview_v7': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_android_support_cardview_v7',
- 'version': 'version:2@28.0.0.cr0',
+ 'version': 'version:2@28.0.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_android_support_collections': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_android_support_collections',
- 'version': 'version:2@28.0.0.cr0',
+ 'version': 'version:2@28.0.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_android_support_coordinatorlayout': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_android_support_coordinatorlayout',
- 'version': 'version:2@28.0.0.cr0',
+ 'version': 'version:2@28.0.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_android_support_cursoradapter': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_android_support_cursoradapter',
- 'version': 'version:2@28.0.0.cr0',
+ 'version': 'version:2@28.0.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_android_support_customview': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_android_support_customview',
- 'version': 'version:2@28.0.0.cr0',
+ 'version': 'version:2@28.0.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_android_support_design': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_android_support_design',
- 'version': 'version:2@28.0.0.cr0',
+ 'version': 'version:2@28.0.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_android_support_documentfile': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_android_support_documentfile',
- 'version': 'version:2@28.0.0.cr0',
+ 'version': 'version:2@28.0.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_android_support_drawerlayout': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_android_support_drawerlayout',
- 'version': 'version:2@28.0.0.cr0',
+ 'version': 'version:2@28.0.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_android_support_interpolator': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_android_support_interpolator',
- 'version': 'version:2@28.0.0.cr0',
+ 'version': 'version:2@28.0.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_android_support_loader': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_android_support_loader',
- 'version': 'version:2@28.0.0.cr0',
+ 'version': 'version:2@28.0.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_android_support_localbroadcastmanager': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_android_support_localbroadcastmanager',
- 'version': 'version:2@28.0.0.cr0',
+ 'version': 'version:2@28.0.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_android_support_multidex': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_android_support_multidex',
- 'version': 'version:2@1.0.0.cr0',
+ 'version': 'version:2@1.0.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_android_support_print': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_android_support_print',
- 'version': 'version:2@28.0.0.cr0',
+ 'version': 'version:2@28.0.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_android_support_recyclerview_v7': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_android_support_recyclerview_v7',
- 'version': 'version:2@28.0.0.cr0',
+ 'version': 'version:2@28.0.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_android_support_slidingpanelayout': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_android_support_slidingpanelayout',
- 'version': 'version:2@28.0.0.cr0',
+ 'version': 'version:2@28.0.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_android_support_support_annotations': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_android_support_support_annotations',
- 'version': 'version:2@28.0.0.cr0',
+ 'version': 'version:2@28.0.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_android_support_support_compat': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_android_support_support_compat',
- 'version': 'version:2@28.0.0.cr0',
+ 'version': 'version:2@28.0.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_android_support_support_core_ui': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_android_support_support_core_ui',
- 'version': 'version:2@28.0.0.cr0',
+ 'version': 'version:2@28.0.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_android_support_support_core_utils': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_android_support_support_core_utils',
- 'version': 'version:2@28.0.0.cr0',
+ 'version': 'version:2@28.0.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_android_support_support_fragment': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_android_support_support_fragment',
- 'version': 'version:2@28.0.0.cr0',
+ 'version': 'version:2@28.0.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_android_support_support_media_compat': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_android_support_support_media_compat',
- 'version': 'version:2@28.0.0.cr0',
+ 'version': 'version:2@28.0.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_android_support_support_v4': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_android_support_support_v4',
- 'version': 'version:2@28.0.0.cr0',
+ 'version': 'version:2@28.0.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_android_support_support_vector_drawable': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_android_support_support_vector_drawable',
- 'version': 'version:2@28.0.0.cr0',
+ 'version': 'version:2@28.0.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_android_support_swiperefreshlayout': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_android_support_swiperefreshlayout',
- 'version': 'version:2@28.0.0.cr0',
+ 'version': 'version:2@28.0.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_android_support_transition': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_android_support_transition',
- 'version': 'version:2@28.0.0.cr0',
+ 'version': 'version:2@28.0.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_android_support_versionedparcelable': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_android_support_versionedparcelable',
- 'version': 'version:2@28.0.0.cr0',
+ 'version': 'version:2@28.0.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_android_support_viewpager': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_android_support_viewpager',
- 'version': 'version:2@28.0.0.cr0',
+ 'version': 'version:2@28.0.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_android_tools_common': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_android_tools_common',
- 'version': 'version:2@30.0.0-alpha10.cr0',
+ 'version': 'version:2@30.2.0-beta01.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
- 'src/third_party/android_deps/libs/com_android_tools_desugar_jdk_libs': {
+
+ 'src/third_party/android_deps/libs/com_android_tools_layoutlib_layoutlib_api': {
'packages': [
{
- 'package': 'chromium/third_party/android_deps/libs/com_android_tools_desugar_jdk_libs',
- 'version': 'version:2@1.1.1.cr0',
+ 'package': 'chromium/third_party/android_deps/libs/com_android_tools_layoutlib_layoutlib_api',
+ 'version': 'version:2@30.2.0-beta01.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
- 'src/third_party/android_deps/libs/com_android_tools_desugar_jdk_libs_configuration': {
+
+ 'src/third_party/android_deps/libs/com_android_tools_sdk_common': {
'packages': [
{
- 'package': 'chromium/third_party/android_deps/libs/com_android_tools_desugar_jdk_libs_configuration',
- 'version': 'version:2@1.1.1.cr0',
+ 'package': 'chromium/third_party/android_deps/libs/com_android_tools_sdk_common',
+ 'version': 'version:2@30.2.0-beta01.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
- 'src/third_party/android_deps/libs/com_android_tools_layoutlib_layoutlib_api': {
+
+ 'src/third_party/android_deps/libs/com_github_ben_manes_caffeine_caffeine': {
'packages': [
{
- 'package': 'chromium/third_party/android_deps/libs/com_android_tools_layoutlib_layoutlib_api',
- 'version': 'version:2@30.0.0-alpha10.cr0',
+ 'package': 'chromium/third_party/android_deps/libs/com_github_ben_manes_caffeine_caffeine',
+ 'version': 'version:2@2.8.8.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
- 'src/third_party/android_deps/libs/com_android_tools_sdk_common': {
+
+ 'src/third_party/android_deps/libs/com_github_kevinstern_software_and_algorithms': {
'packages': [
{
- 'package': 'chromium/third_party/android_deps/libs/com_android_tools_sdk_common',
- 'version': 'version:2@30.0.0-alpha10.cr0',
+ 'package': 'chromium/third_party/android_deps/libs/com_github_kevinstern_software_and_algorithms',
+ 'version': 'version:2@1.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
- 'src/third_party/android_deps/libs/com_github_ben_manes_caffeine_caffeine': {
+
+ 'src/third_party/android_deps/libs/com_google_android_annotations': {
'packages': [
{
- 'package': 'chromium/third_party/android_deps/libs/com_github_ben_manes_caffeine_caffeine',
- 'version': 'version:2@2.8.8.cr0',
+ 'package': 'chromium/third_party/android_deps/libs/com_google_android_annotations',
+ 'version': 'version:2@4.1.1.4.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
- 'src/third_party/android_deps/libs/com_github_kevinstern_software_and_algorithms': {
+
+ 'src/third_party/android_deps/libs/com_google_android_apps_common_testing_accessibility_framework_accessibility_test_framework': {
'packages': [
{
- 'package': 'chromium/third_party/android_deps/libs/com_github_kevinstern_software_and_algorithms',
- 'version': 'version:2@1.0.cr0',
+ 'package': 'chromium/third_party/android_deps/libs/com_google_android_apps_common_testing_accessibility_framework_accessibility_test_framework',
+ 'version': 'version:2@4.0.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_google_android_datatransport_transport_api': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_google_android_datatransport_transport_api',
- 'version': 'version:2@2.2.1.cr0',
+ 'version': 'version:2@2.2.1.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_google_android_gms_play_services_auth': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_google_android_gms_play_services_auth',
- 'version': 'version:2@17.0.0.cr0',
+ 'version': 'version:2@20.1.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_google_android_gms_play_services_auth_api_phone': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_google_android_gms_play_services_auth_api_phone',
- 'version': 'version:2@17.5.0.cr0',
+ 'version': 'version:2@18.0.1.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_google_android_gms_play_services_auth_base': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_google_android_gms_play_services_auth_base',
- 'version': 'version:2@17.0.0.cr0',
+ 'version': 'version:2@18.0.2.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_google_android_gms_play_services_base': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_google_android_gms_play_services_base',
- 'version': 'version:2@17.5.0.cr0',
+ 'version': 'version:2@18.0.1.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_google_android_gms_play_services_basement': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_google_android_gms_play_services_basement',
- 'version': 'version:2@17.5.0.cr0',
+ 'version': 'version:2@18.1.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_google_android_gms_play_services_cast': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_google_android_gms_play_services_cast',
- 'version': 'version:2@17.0.0.cr0',
+ 'version': 'version:2@17.0.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_google_android_gms_play_services_cast_framework': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_google_android_gms_play_services_cast_framework',
- 'version': 'version:2@17.0.0.cr0',
+ 'version': 'version:2@17.0.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_google_android_gms_play_services_clearcut': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_google_android_gms_play_services_clearcut',
- 'version': 'version:2@17.0.0.cr0',
+ 'version': 'version:2@17.0.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_google_android_gms_play_services_cloud_messaging': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_google_android_gms_play_services_cloud_messaging',
- 'version': 'version:2@16.0.0.cr0',
- },
- ],
- 'condition': 'checkout_android',
- 'dep_type': 'cipd',
- },
- 'src/third_party/android_deps/libs/com_google_android_gms_play_services_fido': {
- 'packages': [
- {
- 'package': 'chromium/third_party/android_deps/libs/com_google_android_gms_play_services_fido',
- 'version': 'version:2@19.0.0-beta.cr0',
+ 'version': 'version:2@16.0.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_google_android_gms_play_services_flags': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_google_android_gms_play_services_flags',
- 'version': 'version:2@17.0.0.cr0',
+ 'version': 'version:2@17.0.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_google_android_gms_play_services_gcm': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_google_android_gms_play_services_gcm',
- 'version': 'version:2@17.0.0.cr0',
+ 'version': 'version:2@17.0.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_google_android_gms_play_services_iid': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_google_android_gms_play_services_iid',
- 'version': 'version:2@17.0.0.cr0',
+ 'version': 'version:2@17.0.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_google_android_gms_play_services_instantapps': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_google_android_gms_play_services_instantapps',
- 'version': 'version:2@17.0.0.cr0',
+ 'version': 'version:2@18.0.1.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_google_android_gms_play_services_location': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_google_android_gms_play_services_location',
- 'version': 'version:2@17.0.0.cr0',
+ 'version': 'version:2@19.0.1.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_google_android_gms_play_services_phenotype': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_google_android_gms_play_services_phenotype',
- 'version': 'version:2@17.0.0.cr0',
+ 'version': 'version:2@17.0.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_google_android_gms_play_services_places_placereport': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_google_android_gms_play_services_places_placereport',
- 'version': 'version:2@17.0.0.cr0',
+ 'version': 'version:2@17.0.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_google_android_gms_play_services_stats': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_google_android_gms_play_services_stats',
- 'version': 'version:2@17.0.0.cr0',
+ 'version': 'version:2@17.0.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_google_android_gms_play_services_tasks': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_google_android_gms_play_services_tasks',
- 'version': 'version:2@17.2.0.cr0',
+ 'version': 'version:2@18.0.2.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_google_android_gms_play_services_vision': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_google_android_gms_play_services_vision',
- 'version': 'version:2@18.0.0.cr0',
+ 'version': 'version:2@20.1.3.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_google_android_gms_play_services_vision_common': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_google_android_gms_play_services_vision_common',
- 'version': 'version:2@18.0.0.cr0',
+ 'version': 'version:2@19.1.3.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_google_android_material_material': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_google_android_material_material',
- 'version': 'version:2@1.6.0-alpha01.cr0',
+ 'version': 'version:2@1.7.0-alpha02.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
- 'src/third_party/android_deps/libs/com_google_android_play_core': {
+
+ 'src/third_party/android_deps/libs/com_google_android_play_core_common': {
'packages': [
{
- 'package': 'chromium/third_party/android_deps/libs/com_google_android_play_core',
- 'version': 'version:2@1.10.0.cr0',
+ 'package': 'chromium/third_party/android_deps/libs/com_google_android_play_core_common',
+ 'version': 'version:2@2.0.2.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
+ 'src/third_party/android_deps/libs/com_google_android_play_feature_delivery': {
+ 'packages': [
+ {
+ 'package': 'chromium/third_party/android_deps/libs/com_google_android_play_feature_delivery',
+ 'version': 'version:2@2.0.1.cr1',
+ },
+ ],
+ 'condition': 'checkout_android',
+ 'dep_type': 'cipd',
+ },
+
'src/third_party/android_deps/libs/com_google_auto_auto_common': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_google_auto_auto_common',
- 'version': 'version:2@1.1.2.cr0',
+ 'version': 'version:2@1.2.1.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_google_auto_service_auto_service': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_google_auto_service_auto_service',
- 'version': 'version:2@1.0-rc6.cr0',
+ 'version': 'version:2@1.0-rc6.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_google_auto_service_auto_service_annotations': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_google_auto_service_auto_service_annotations',
- 'version': 'version:2@1.0-rc6.cr0',
+ 'version': 'version:2@1.0-rc6.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_google_auto_value_auto_value_annotations': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_google_auto_value_auto_value_annotations',
- 'version': 'version:2@1.7.cr0',
- },
- ],
- 'condition': 'checkout_android',
- 'dep_type': 'cipd',
- },
- 'src/third_party/android_deps/libs/com_google_code_findbugs_jformatstring': {
- 'packages': [
- {
- 'package': 'chromium/third_party/android_deps/libs/com_google_code_findbugs_jformatstring',
- 'version': 'version:2@3.0.0.cr0',
+ 'version': 'version:2@1.10.1.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_google_code_findbugs_jsr305': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_google_code_findbugs_jsr305',
- 'version': 'version:2@3.0.2.cr0',
+ 'version': 'version:2@3.0.2.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_google_code_gson_gson': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_google_code_gson_gson',
- 'version': 'version:2@2.8.0.cr0',
+ 'version': 'version:2@2.9.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_google_dagger_dagger': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_google_dagger_dagger',
- 'version': 'version:2@2.30.cr0',
+ 'version': 'version:2@2.30.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_google_dagger_dagger_compiler': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_google_dagger_dagger_compiler',
- 'version': 'version:2@2.30.cr0',
+ 'version': 'version:2@2.30.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_google_dagger_dagger_producers': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_google_dagger_dagger_producers',
- 'version': 'version:2@2.30.cr0',
+ 'version': 'version:2@2.30.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_google_dagger_dagger_spi': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_google_dagger_dagger_spi',
- 'version': 'version:2@2.30.cr0',
+ 'version': 'version:2@2.30.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_google_errorprone_error_prone_annotation': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_google_errorprone_error_prone_annotation',
- 'version': 'version:2@2.10.0.cr0',
+ 'version': 'version:2@2.11.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_google_errorprone_error_prone_annotations': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_google_errorprone_error_prone_annotations',
- 'version': 'version:2@2.10.0.cr0',
+ 'version': 'version:2@2.18.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_google_errorprone_error_prone_check_api': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_google_errorprone_error_prone_check_api',
- 'version': 'version:2@2.10.0.cr0',
+ 'version': 'version:2@2.11.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_google_errorprone_error_prone_core': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_google_errorprone_error_prone_core',
- 'version': 'version:2@2.10.0.cr0',
+ 'version': 'version:2@2.11.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_google_errorprone_error_prone_type_annotations': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_google_errorprone_error_prone_type_annotations',
- 'version': 'version:2@2.10.0.cr0',
+ 'version': 'version:2@2.11.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_google_errorprone_javac': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_google_errorprone_javac',
- 'version': 'version:2@9+181-r4173-1.cr0',
+ 'version': 'version:2@9+181-r4173-1.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_google_errorprone_javac_shaded': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_google_errorprone_javac_shaded',
- 'version': 'version:2@9-dev-r4023-3.cr0',
+ 'version': 'version:2@9-dev-r4023-3.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_google_firebase_firebase_annotations': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_google_firebase_firebase_annotations',
- 'version': 'version:2@16.0.0.cr0',
+ 'version': 'version:2@16.0.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_google_firebase_firebase_common': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_google_firebase_firebase_common',
- 'version': 'version:2@19.5.0.cr0',
+ 'version': 'version:2@19.5.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_google_firebase_firebase_components': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_google_firebase_firebase_components',
- 'version': 'version:2@16.1.0.cr0',
+ 'version': 'version:2@16.1.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_google_firebase_firebase_encoders': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_google_firebase_firebase_encoders',
- 'version': 'version:2@16.1.0.cr0',
+ 'version': 'version:2@16.1.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_google_firebase_firebase_encoders_json': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_google_firebase_firebase_encoders_json',
- 'version': 'version:2@17.1.0.cr0',
+ 'version': 'version:2@17.1.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_google_firebase_firebase_iid': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_google_firebase_firebase_iid',
- 'version': 'version:2@21.0.1.cr0',
+ 'version': 'version:2@21.0.1.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_google_firebase_firebase_iid_interop': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_google_firebase_firebase_iid_interop',
- 'version': 'version:2@17.0.0.cr0',
+ 'version': 'version:2@17.0.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_google_firebase_firebase_installations': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_google_firebase_firebase_installations',
- 'version': 'version:2@16.3.5.cr0',
+ 'version': 'version:2@16.3.5.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_google_firebase_firebase_installations_interop': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_google_firebase_firebase_installations_interop',
- 'version': 'version:2@16.0.1.cr0',
+ 'version': 'version:2@16.0.1.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_google_firebase_firebase_measurement_connector': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_google_firebase_firebase_measurement_connector',
- 'version': 'version:2@18.0.0.cr0',
+ 'version': 'version:2@18.0.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_google_firebase_firebase_messaging': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_google_firebase_firebase_messaging',
- 'version': 'version:2@21.0.1.cr0',
- },
- ],
- 'condition': 'checkout_android',
- 'dep_type': 'cipd',
- },
- 'src/third_party/android_deps/libs/com_google_flatbuffers_flatbuffers_java': {
- 'packages': [
- {
- 'package': 'chromium/third_party/android_deps/libs/com_google_flatbuffers_flatbuffers_java',
- 'version': 'version:2@2.0.3.cr0',
+ 'version': 'version:2@21.0.1.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_google_googlejavaformat_google_java_format': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_google_googlejavaformat_google_java_format',
- 'version': 'version:2@1.5.cr0',
+ 'version': 'version:2@1.5.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_google_guava_failureaccess': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_google_guava_failureaccess',
- 'version': 'version:2@1.0.1.cr0',
+ 'version': 'version:2@1.0.1.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_google_guava_guava': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_google_guava_guava',
- 'version': 'version:2@31.0-jre.cr0',
+ 'version': 'version:2@31.1-jre.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_google_guava_guava_android': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_google_guava_guava_android',
- 'version': 'version:2@31.0-android.cr0',
+ 'version': 'version:2@31.1-android.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_google_guava_listenablefuture': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_google_guava_listenablefuture',
- 'version': 'version:2@1.0.cr0',
+ 'version': 'version:2@1.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_google_j2objc_j2objc_annotations': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_google_j2objc_j2objc_annotations',
- 'version': 'version:2@1.3.cr0',
+ 'version': 'version:2@1.3.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_google_protobuf_protobuf_java': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_google_protobuf_protobuf_java',
- 'version': 'version:2@3.4.0.cr0',
+ 'version': 'version:2@3.19.2.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_google_protobuf_protobuf_javalite': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_google_protobuf_protobuf_javalite',
- 'version': 'version:2@3.13.0.cr0',
+ 'version': 'version:2@3.21.1.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_googlecode_java_diff_utils_diffutils': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_googlecode_java_diff_utils_diffutils',
- 'version': 'version:2@1.3.0.cr0',
+ 'version': 'version:2@1.3.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_squareup_javapoet': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_squareup_javapoet',
- 'version': 'version:2@1.13.0.cr0',
+ 'version': 'version:2@1.13.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_squareup_javawriter': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_squareup_javawriter',
- 'version': 'version:2@2.1.1.cr0',
- },
- ],
- 'condition': 'checkout_android',
- 'dep_type': 'cipd',
- },
- 'src/third_party/android_deps/libs/io_github_java_diff_utils_java_diff_utils': {
- 'packages': [
- {
- 'package': 'chromium/third_party/android_deps/libs/io_github_java_diff_utils_java_diff_utils',
- 'version': 'version:2@4.0.cr0',
- },
- ],
- 'condition': 'checkout_android',
- 'dep_type': 'cipd',
- },
- 'src/third_party/android_deps/libs/javax_annotation_javax_annotation_api': {
- 'packages': [
- {
- 'package': 'chromium/third_party/android_deps/libs/javax_annotation_javax_annotation_api',
- 'version': 'version:2@1.3.2.cr0',
- },
- ],
- 'condition': 'checkout_android',
- 'dep_type': 'cipd',
- },
- 'src/third_party/android_deps/libs/javax_annotation_jsr250_api': {
- 'packages': [
- {
- 'package': 'chromium/third_party/android_deps/libs/javax_annotation_jsr250_api',
- 'version': 'version:2@1.0.cr0',
- },
- ],
- 'condition': 'checkout_android',
- 'dep_type': 'cipd',
- },
- 'src/third_party/android_deps/libs/javax_inject_javax_inject': {
- 'packages': [
- {
- 'package': 'chromium/third_party/android_deps/libs/javax_inject_javax_inject',
- 'version': 'version:2@1.cr0',
+ 'version': 'version:2@2.1.1.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
- 'src/third_party/android_deps/libs/nekohtml_nekohtml': {
+
+ 'src/third_party/android_deps/libs/com_squareup_okio_okio_jvm': {
'packages': [
{
- 'package': 'chromium/third_party/android_deps/libs/nekohtml_nekohtml',
- 'version': 'version:2@1.9.6.2.cr0',
+ 'package': 'chromium/third_party/android_deps/libs/com_squareup_okio_okio_jvm',
+ 'version': 'version:2@3.3.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
- 'src/third_party/android_deps/libs/nekohtml_xercesminimal': {
+
+ 'src/third_party/android_deps/libs/com_squareup_wire_wire_runtime_jvm': {
'packages': [
{
- 'package': 'chromium/third_party/android_deps/libs/nekohtml_xercesminimal',
- 'version': 'version:2@1.9.6.2.cr0',
+ 'package': 'chromium/third_party/android_deps/libs/com_squareup_wire_wire_runtime_jvm',
+ 'version': 'version:2@4.7.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
- 'src/third_party/android_deps/libs/net_ltgt_gradle_incap_incap': {
+
+ 'src/third_party/android_deps/libs/io_github_java_diff_utils_java_diff_utils': {
'packages': [
{
- 'package': 'chromium/third_party/android_deps/libs/net_ltgt_gradle_incap_incap',
- 'version': 'version:2@0.2.cr0',
+ 'package': 'chromium/third_party/android_deps/libs/io_github_java_diff_utils_java_diff_utils',
+ 'version': 'version:2@4.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
- 'src/third_party/android_deps/libs/net_sf_kxml_kxml2': {
+
+ 'src/third_party/android_deps/libs/io_grpc_grpc_api': {
'packages': [
{
- 'package': 'chromium/third_party/android_deps/libs/net_sf_kxml_kxml2',
- 'version': 'version:2@2.3.0.cr0',
+ 'package': 'chromium/third_party/android_deps/libs/io_grpc_grpc_api',
+ 'version': 'version:2@1.49.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
- 'src/third_party/android_deps/libs/org_apache_ant_ant': {
+
+ 'src/third_party/android_deps/libs/io_grpc_grpc_binder': {
'packages': [
{
- 'package': 'chromium/third_party/android_deps/libs/org_apache_ant_ant',
- 'version': 'version:2@1.8.0.cr0',
+ 'package': 'chromium/third_party/android_deps/libs/io_grpc_grpc_binder',
+ 'version': 'version:2@1.49.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
- 'src/third_party/android_deps/libs/org_apache_ant_ant_launcher': {
+
+ 'src/third_party/android_deps/libs/io_grpc_grpc_context': {
'packages': [
{
- 'package': 'chromium/third_party/android_deps/libs/org_apache_ant_ant_launcher',
- 'version': 'version:2@1.8.0.cr0',
+ 'package': 'chromium/third_party/android_deps/libs/io_grpc_grpc_context',
+ 'version': 'version:2@1.49.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
- 'src/third_party/android_deps/libs/org_apache_maven_maven_ant_tasks': {
+
+ 'src/third_party/android_deps/libs/io_grpc_grpc_core': {
'packages': [
{
- 'package': 'chromium/third_party/android_deps/libs/org_apache_maven_maven_ant_tasks',
- 'version': 'version:2@2.1.3.cr0',
+ 'package': 'chromium/third_party/android_deps/libs/io_grpc_grpc_core',
+ 'version': 'version:2@1.49.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
- 'src/third_party/android_deps/libs/org_apache_maven_maven_artifact': {
+
+ 'src/third_party/android_deps/libs/io_grpc_grpc_protobuf_lite': {
'packages': [
{
- 'package': 'chromium/third_party/android_deps/libs/org_apache_maven_maven_artifact',
- 'version': 'version:2@2.2.1.cr0',
+ 'package': 'chromium/third_party/android_deps/libs/io_grpc_grpc_protobuf_lite',
+ 'version': 'version:2@1.49.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
- 'src/third_party/android_deps/libs/org_apache_maven_maven_artifact_manager': {
+
+ 'src/third_party/android_deps/libs/io_grpc_grpc_stub': {
'packages': [
{
- 'package': 'chromium/third_party/android_deps/libs/org_apache_maven_maven_artifact_manager',
- 'version': 'version:2@2.2.1.cr0',
+ 'package': 'chromium/third_party/android_deps/libs/io_grpc_grpc_stub',
+ 'version': 'version:2@1.49.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
- 'src/third_party/android_deps/libs/org_apache_maven_maven_error_diagnostics': {
+
+ 'src/third_party/android_deps/libs/io_perfmark_perfmark_api': {
'packages': [
{
- 'package': 'chromium/third_party/android_deps/libs/org_apache_maven_maven_error_diagnostics',
- 'version': 'version:2@2.2.1.cr0',
+ 'package': 'chromium/third_party/android_deps/libs/io_perfmark_perfmark_api',
+ 'version': 'version:2@0.25.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
- 'src/third_party/android_deps/libs/org_apache_maven_maven_model': {
+
+ 'src/third_party/android_deps/libs/javax_annotation_javax_annotation_api': {
'packages': [
{
- 'package': 'chromium/third_party/android_deps/libs/org_apache_maven_maven_model',
- 'version': 'version:2@2.2.1.cr0',
+ 'package': 'chromium/third_party/android_deps/libs/javax_annotation_javax_annotation_api',
+ 'version': 'version:2@1.3.2.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
- 'src/third_party/android_deps/libs/org_apache_maven_maven_plugin_registry': {
+
+ 'src/third_party/android_deps/libs/javax_annotation_jsr250_api': {
'packages': [
{
- 'package': 'chromium/third_party/android_deps/libs/org_apache_maven_maven_plugin_registry',
- 'version': 'version:2@2.2.1.cr0',
+ 'package': 'chromium/third_party/android_deps/libs/javax_annotation_jsr250_api',
+ 'version': 'version:2@1.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
- 'src/third_party/android_deps/libs/org_apache_maven_maven_profile': {
+
+ 'src/third_party/android_deps/libs/javax_inject_javax_inject': {
'packages': [
{
- 'package': 'chromium/third_party/android_deps/libs/org_apache_maven_maven_profile',
- 'version': 'version:2@2.2.1.cr0',
+ 'package': 'chromium/third_party/android_deps/libs/javax_inject_javax_inject',
+ 'version': 'version:2@1.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
- 'src/third_party/android_deps/libs/org_apache_maven_maven_project': {
+
+ 'src/third_party/android_deps/libs/net_bytebuddy_byte_buddy': {
'packages': [
{
- 'package': 'chromium/third_party/android_deps/libs/org_apache_maven_maven_project',
- 'version': 'version:2@2.2.1.cr0',
+ 'package': 'chromium/third_party/android_deps/libs/net_bytebuddy_byte_buddy',
+ 'version': 'version:2@1.14.5.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
- 'src/third_party/android_deps/libs/org_apache_maven_maven_repository_metadata': {
+
+ 'src/third_party/android_deps/libs/net_bytebuddy_byte_buddy_agent': {
'packages': [
{
- 'package': 'chromium/third_party/android_deps/libs/org_apache_maven_maven_repository_metadata',
- 'version': 'version:2@2.2.1.cr0',
+ 'package': 'chromium/third_party/android_deps/libs/net_bytebuddy_byte_buddy_agent',
+ 'version': 'version:2@1.14.5.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
- 'src/third_party/android_deps/libs/org_apache_maven_maven_settings': {
+
+ 'src/third_party/android_deps/libs/net_ltgt_gradle_incap_incap': {
'packages': [
{
- 'package': 'chromium/third_party/android_deps/libs/org_apache_maven_maven_settings',
- 'version': 'version:2@2.2.1.cr0',
+ 'package': 'chromium/third_party/android_deps/libs/net_ltgt_gradle_incap_incap',
+ 'version': 'version:2@0.2.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
- 'src/third_party/android_deps/libs/org_apache_maven_wagon_wagon_file': {
+
+ 'src/third_party/android_deps/libs/org_bouncycastle_bcprov_jdk18on': {
'packages': [
{
- 'package': 'chromium/third_party/android_deps/libs/org_apache_maven_wagon_wagon_file',
- 'version': 'version:2@1.0-beta-6.cr0',
+ 'package': 'chromium/third_party/android_deps/libs/org_bouncycastle_bcprov_jdk18on',
+ 'version': 'version:2@1.72.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
- 'src/third_party/android_deps/libs/org_apache_maven_wagon_wagon_http_lightweight': {
+
+ 'src/third_party/android_deps/libs/org_ccil_cowan_tagsoup_tagsoup': {
'packages': [
{
- 'package': 'chromium/third_party/android_deps/libs/org_apache_maven_wagon_wagon_http_lightweight',
- 'version': 'version:2@1.0-beta-6.cr0',
+ 'package': 'chromium/third_party/android_deps/libs/org_ccil_cowan_tagsoup_tagsoup',
+ 'version': 'version:2@1.2.1.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
- 'src/third_party/android_deps/libs/org_apache_maven_wagon_wagon_http_shared': {
+
+ 'src/third_party/android_deps/libs/org_checkerframework_checker_compat_qual': {
'packages': [
{
- 'package': 'chromium/third_party/android_deps/libs/org_apache_maven_wagon_wagon_http_shared',
- 'version': 'version:2@1.0-beta-6.cr0',
+ 'package': 'chromium/third_party/android_deps/libs/org_checkerframework_checker_compat_qual',
+ 'version': 'version:2@2.5.5.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
- 'src/third_party/android_deps/libs/org_apache_maven_wagon_wagon_provider_api': {
+
+ 'src/third_party/android_deps/libs/org_checkerframework_checker_qual': {
'packages': [
{
- 'package': 'chromium/third_party/android_deps/libs/org_apache_maven_wagon_wagon_provider_api',
- 'version': 'version:2@1.0-beta-6.cr0',
+ 'package': 'chromium/third_party/android_deps/libs/org_checkerframework_checker_qual',
+ 'version': 'version:2@3.25.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
- 'src/third_party/android_deps/libs/org_ccil_cowan_tagsoup_tagsoup': {
+
+ 'src/third_party/android_deps/libs/org_checkerframework_checker_util': {
'packages': [
{
- 'package': 'chromium/third_party/android_deps/libs/org_ccil_cowan_tagsoup_tagsoup',
- 'version': 'version:2@1.2.1.cr0',
+ 'package': 'chromium/third_party/android_deps/libs/org_checkerframework_checker_util',
+ 'version': 'version:2@3.25.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
- 'src/third_party/android_deps/libs/org_checkerframework_checker_compat_qual': {
+
+ 'src/third_party/android_deps/libs/org_checkerframework_dataflow_errorprone': {
'packages': [
{
- 'package': 'chromium/third_party/android_deps/libs/org_checkerframework_checker_compat_qual',
- 'version': 'version:2@2.5.5.cr0',
+ 'package': 'chromium/third_party/android_deps/libs/org_checkerframework_dataflow_errorprone',
+ 'version': 'version:2@3.15.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
- 'src/third_party/android_deps/libs/org_checkerframework_checker_qual': {
+
+ 'src/third_party/android_deps/libs/org_codehaus_mojo_animal_sniffer_annotations': {
'packages': [
{
- 'package': 'chromium/third_party/android_deps/libs/org_checkerframework_checker_qual',
- 'version': 'version:2@3.12.0.cr0',
+ 'package': 'chromium/third_party/android_deps/libs/org_codehaus_mojo_animal_sniffer_annotations',
+ 'version': 'version:2@1.21.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
- 'src/third_party/android_deps/libs/org_checkerframework_dataflow_errorprone': {
+
+ 'src/third_party/android_deps/libs/org_conscrypt_conscrypt_openjdk_uber': {
'packages': [
{
- 'package': 'chromium/third_party/android_deps/libs/org_checkerframework_dataflow_errorprone',
- 'version': 'version:2@3.15.0.cr0',
+ 'package': 'chromium/third_party/android_deps/libs/org_conscrypt_conscrypt_openjdk_uber',
+ 'version': 'version:2@2.5.2.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
- 'src/third_party/android_deps/libs/org_codehaus_mojo_animal_sniffer_annotations': {
+
+ 'src/third_party/android_deps/libs/org_eclipse_jgit_org_eclipse_jgit': {
'packages': [
{
- 'package': 'chromium/third_party/android_deps/libs/org_codehaus_mojo_animal_sniffer_annotations',
- 'version': 'version:2@1.17.cr0',
+ 'package': 'chromium/third_party/android_deps/libs/org_eclipse_jgit_org_eclipse_jgit',
+ 'version': 'version:2@4.4.1.201607150455-r.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
- 'src/third_party/android_deps/libs/org_codehaus_plexus_plexus_container_default': {
+
+ 'src/third_party/android_deps/libs/org_hamcrest_hamcrest': {
'packages': [
{
- 'package': 'chromium/third_party/android_deps/libs/org_codehaus_plexus_plexus_container_default',
- 'version': 'version:2@1.0-alpha-9-stable-1.cr0',
+ 'package': 'chromium/third_party/android_deps/libs/org_hamcrest_hamcrest',
+ 'version': 'version:2@2.2.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
- 'src/third_party/android_deps/libs/org_codehaus_plexus_plexus_interpolation': {
+
+ 'src/third_party/android_deps/libs/org_jetbrains_kotlin_kotlin_stdlib_jdk7': {
'packages': [
{
- 'package': 'chromium/third_party/android_deps/libs/org_codehaus_plexus_plexus_interpolation',
- 'version': 'version:2@1.11.cr0',
+ 'package': 'chromium/third_party/android_deps/libs/org_jetbrains_kotlin_kotlin_stdlib_jdk7',
+ 'version': 'version:2@1.8.20.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
- 'src/third_party/android_deps/libs/org_codehaus_plexus_plexus_utils': {
+
+ 'src/third_party/android_deps/libs/org_jetbrains_kotlin_kotlin_stdlib_jdk8': {
'packages': [
{
- 'package': 'chromium/third_party/android_deps/libs/org_codehaus_plexus_plexus_utils',
- 'version': 'version:2@1.5.15.cr0',
+ 'package': 'chromium/third_party/android_deps/libs/org_jetbrains_kotlin_kotlin_stdlib_jdk8',
+ 'version': 'version:2@1.8.20.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
- 'src/third_party/android_deps/libs/org_eclipse_jgit_org_eclipse_jgit': {
+
+ 'src/third_party/android_deps/libs/org_jetbrains_kotlinx_kotlinx_coroutines_android': {
'packages': [
{
- 'package': 'chromium/third_party/android_deps/libs/org_eclipse_jgit_org_eclipse_jgit',
- 'version': 'version:2@4.4.1.201607150455-r.cr0',
+ 'package': 'chromium/third_party/android_deps/libs/org_jetbrains_kotlinx_kotlinx_coroutines_android',
+ 'version': 'version:2@1.6.4.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
- 'src/third_party/android_deps/libs/org_jetbrains_annotations': {
+
+ 'src/third_party/android_deps/libs/org_jetbrains_kotlinx_kotlinx_coroutines_core_jvm': {
'packages': [
{
- 'package': 'chromium/third_party/android_deps/libs/org_jetbrains_annotations',
- 'version': 'version:2@13.0.cr0',
+ 'package': 'chromium/third_party/android_deps/libs/org_jetbrains_kotlinx_kotlinx_coroutines_core_jvm',
+ 'version': 'version:2@1.6.4.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
- 'src/third_party/android_deps/libs/org_jetbrains_kotlin_kotlin_stdlib': {
+
+ 'src/third_party/android_deps/libs/org_jetbrains_kotlinx_kotlinx_coroutines_guava': {
'packages': [
{
- 'package': 'chromium/third_party/android_deps/libs/org_jetbrains_kotlin_kotlin_stdlib',
- 'version': 'version:2@1.6.0.cr0',
+ 'package': 'chromium/third_party/android_deps/libs/org_jetbrains_kotlinx_kotlinx_coroutines_guava',
+ 'version': 'version:2@1.6.4.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
- 'src/third_party/android_deps/libs/org_jetbrains_kotlin_kotlin_stdlib_common': {
+
+ 'src/third_party/android_deps/libs/org_jetbrains_kotlinx_kotlinx_metadata_jvm': {
'packages': [
{
- 'package': 'chromium/third_party/android_deps/libs/org_jetbrains_kotlin_kotlin_stdlib_common',
- 'version': 'version:2@1.6.0.cr0',
+ 'package': 'chromium/third_party/android_deps/libs/org_jetbrains_kotlinx_kotlinx_metadata_jvm',
+ 'version': 'version:2@0.1.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
- 'src/third_party/android_deps/libs/org_jetbrains_kotlin_kotlin_stdlib_jdk7': {
+
+ 'src/third_party/android_deps/libs/org_jsoup_jsoup': {
'packages': [
{
- 'package': 'chromium/third_party/android_deps/libs/org_jetbrains_kotlin_kotlin_stdlib_jdk7',
- 'version': 'version:2@1.5.0.cr0',
+ 'package': 'chromium/third_party/android_deps/libs/org_jsoup_jsoup',
+ 'version': 'version:2@1.15.1.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
- 'src/third_party/android_deps/libs/org_jetbrains_kotlin_kotlin_stdlib_jdk8': {
+
+ 'src/third_party/android_deps/libs/org_mockito_mockito_android': {
'packages': [
{
- 'package': 'chromium/third_party/android_deps/libs/org_jetbrains_kotlin_kotlin_stdlib_jdk8',
- 'version': 'version:2@1.5.0.cr0',
+ 'package': 'chromium/third_party/android_deps/libs/org_mockito_mockito_android',
+ 'version': 'version:2@5.4.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
- 'src/third_party/android_deps/libs/org_jetbrains_kotlinx_kotlinx_coroutines_android': {
+
+ 'src/third_party/android_deps/libs/org_mockito_mockito_core': {
'packages': [
{
- 'package': 'chromium/third_party/android_deps/libs/org_jetbrains_kotlinx_kotlinx_coroutines_android',
- 'version': 'version:2@1.5.0.cr0',
+ 'package': 'chromium/third_party/android_deps/libs/org_mockito_mockito_core',
+ 'version': 'version:2@5.4.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
- 'src/third_party/android_deps/libs/org_jetbrains_kotlinx_kotlinx_coroutines_core_jvm': {
+
+ 'src/third_party/android_deps/libs/org_mockito_mockito_subclass': {
'packages': [
{
- 'package': 'chromium/third_party/android_deps/libs/org_jetbrains_kotlinx_kotlinx_coroutines_core_jvm',
- 'version': 'version:2@1.5.0.cr0',
+ 'package': 'chromium/third_party/android_deps/libs/org_mockito_mockito_subclass',
+ 'version': 'version:2@5.4.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
- 'src/third_party/android_deps/libs/org_jetbrains_kotlinx_kotlinx_metadata_jvm': {
+
+ 'src/third_party/android_deps/libs/org_objenesis_objenesis': {
'packages': [
{
- 'package': 'chromium/third_party/android_deps/libs/org_jetbrains_kotlinx_kotlinx_metadata_jvm',
- 'version': 'version:2@0.1.0.cr0',
+ 'package': 'chromium/third_party/android_deps/libs/org_objenesis_objenesis',
+ 'version': 'version:2@3.3.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/org_ow2_asm_asm': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/org_ow2_asm_asm',
- 'version': 'version:2@7.0.cr0',
+ 'version': 'version:2@9.5.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/org_ow2_asm_asm_analysis': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/org_ow2_asm_asm_analysis',
- 'version': 'version:2@7.0.cr0',
+ 'version': 'version:2@9.5.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/org_ow2_asm_asm_commons': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/org_ow2_asm_asm_commons',
- 'version': 'version:2@7.0.cr0',
+ 'version': 'version:2@9.5.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/org_ow2_asm_asm_tree': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/org_ow2_asm_asm_tree',
- 'version': 'version:2@7.0.cr0',
+ 'version': 'version:2@9.5.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/org_ow2_asm_asm_util': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/org_ow2_asm_asm_util',
- 'version': 'version:2@7.0.cr0',
+ 'version': 'version:2@9.5.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/org_pcollections_pcollections': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/org_pcollections_pcollections',
- 'version': 'version:2@2.1.2.cr0',
+ 'version': 'version:2@3.1.4.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/org_robolectric_annotations': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/org_robolectric_annotations',
- 'version': 'version:2@4.3.1.cr0',
+ 'version': 'version:2@4.10.3.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/org_robolectric_junit': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/org_robolectric_junit',
- 'version': 'version:2@4.3.1.cr0',
+ 'version': 'version:2@4.10.3.cr1',
+ },
+ ],
+ 'condition': 'checkout_android',
+ 'dep_type': 'cipd',
+ },
+
+ 'src/third_party/android_deps/libs/org_robolectric_nativeruntime': {
+ 'packages': [
+ {
+ 'package': 'chromium/third_party/android_deps/libs/org_robolectric_nativeruntime',
+ 'version': 'version:2@4.10.3.cr1',
+ },
+ ],
+ 'condition': 'checkout_android',
+ 'dep_type': 'cipd',
+ },
+
+ 'src/third_party/android_deps/libs/org_robolectric_nativeruntime_dist_compat': {
+ 'packages': [
+ {
+ 'package': 'chromium/third_party/android_deps/libs/org_robolectric_nativeruntime_dist_compat',
+ 'version': 'version:2@1.0.1.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/org_robolectric_pluginapi': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/org_robolectric_pluginapi',
- 'version': 'version:2@4.3.1.cr0',
+ 'version': 'version:2@4.10.3.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/org_robolectric_plugins_maven_dependency_resolver': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/org_robolectric_plugins_maven_dependency_resolver',
- 'version': 'version:2@4.3.1.cr0',
+ 'version': 'version:2@4.10.3.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/org_robolectric_resources': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/org_robolectric_resources',
- 'version': 'version:2@4.3.1.cr0',
+ 'version': 'version:2@4.10.3.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/org_robolectric_robolectric': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/org_robolectric_robolectric',
- 'version': 'version:2@4.3.1.cr0',
+ 'version': 'version:2@4.10.3.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/org_robolectric_sandbox': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/org_robolectric_sandbox',
- 'version': 'version:2@4.3.1.cr0',
+ 'version': 'version:2@4.10.3.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/org_robolectric_shadowapi': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/org_robolectric_shadowapi',
- 'version': 'version:2@4.3.1.cr0',
+ 'version': 'version:2@4.10.3.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/org_robolectric_shadows_framework': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/org_robolectric_shadows_framework',
- 'version': 'version:2@4.3.1.cr0',
+ 'version': 'version:2@4.10.3.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/org_robolectric_shadows_playservices': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/org_robolectric_shadows_playservices',
- 'version': 'version:2@4.3.1.cr0',
+ 'version': 'version:2@4.10.3.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/org_robolectric_utils': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/org_robolectric_utils',
- 'version': 'version:2@4.3.1.cr0',
+ 'version': 'version:2@4.10.3.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/org_robolectric_utils_reflector': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/org_robolectric_utils_reflector',
- 'version': 'version:2@4.3.1.cr0',
+ 'version': 'version:2@4.10.3.cr1',
},
],
'condition': 'checkout_android',
@@ -2197,30 +2391,75 @@ hooks = [
'condition': 'checkout_mac',
},
{
- 'name': 'msan_chained_origins',
+ 'name': 'msan_chained_origins_focal',
+ 'pattern': '.',
+ 'condition': 'checkout_instrumented_libraries',
+ 'action': [ 'python3',
+ 'src/third_party/depot_tools/download_from_google_storage.py',
+ '--no_resume',
+ '--no_auth',
+ '--bucket', 'chromium-instrumented-libraries',
+ '-s', 'src/third_party/instrumented_libraries/binaries/msan-chained-origins-focal.tgz.sha1',
+ ],
+ },
+ {
+ 'name': 'msan_no_origins_focal',
+ 'pattern': '.',
+ 'condition': 'checkout_instrumented_libraries',
+ 'action': [ 'python3',
+ 'src/third_party/depot_tools/download_from_google_storage.py',
+ '--no_resume',
+ '--no_auth',
+ '--bucket', 'chromium-instrumented-libraries',
+ '-s', 'src/third_party/instrumented_libraries/binaries/msan-no-origins-focal.tgz.sha1',
+ ],
+ },
+ {
+ 'name': 'msan_chained_origins_focal',
'pattern': '.',
'condition': 'checkout_instrumented_libraries',
'action': [ 'python3',
'src/third_party/depot_tools/download_from_google_storage.py',
- "--no_resume",
- "--no_auth",
- "--bucket", "chromium-instrumented-libraries",
- "-s", "src/third_party/instrumented_libraries/binaries/msan-chained-origins.tgz.sha1",
+ '--no_resume',
+ '--no_auth',
+ '--bucket', 'chromium-instrumented-libraries',
+ '-s', 'src/third_party/instrumented_libraries/binaries/msan-chained-origins-focal.tgz.sha1',
],
},
{
- 'name': 'msan_no_origins',
+ 'name': 'msan_no_origins_focal',
'pattern': '.',
'condition': 'checkout_instrumented_libraries',
'action': [ 'python3',
'src/third_party/depot_tools/download_from_google_storage.py',
- "--no_resume",
- "--no_auth",
- "--bucket", "chromium-instrumented-libraries",
- "-s", "src/third_party/instrumented_libraries/binaries/msan-no-origins.tgz.sha1",
+ '--no_resume',
+ '--no_auth',
+ '--bucket', 'chromium-instrumented-libraries',
+ '-s', 'src/third_party/instrumented_libraries/binaries/msan-no-origins-focal.tgz.sha1',
],
},
{
+ 'name': 'Download Fuchsia SDK from GCS',
+ 'pattern': '.',
+ 'condition': 'checkout_fuchsia',
+ 'action': [
+ 'python3',
+ 'src/build/fuchsia/update_sdk.py',
+ '--cipd-prefix={fuchsia_sdk_cipd_prefix}',
+ '--version={fuchsia_version}',
+ ],
+ },
+ {
+ 'name': 'Download Fuchsia system images',
+ 'pattern': '.',
+ 'condition': 'checkout_fuchsia and checkout_fuchsia_product_bundles',
+ 'action': [
+ 'python3',
+ 'src/build/fuchsia/update_product_bundles.py',
+ '{checkout_fuchsia_boot_images}',
+ ],
+ },
+ {
# Pull clang if needed or requested via GYP_DEFINES.
# Note: On Win, this should run after win_toolchain, as it may use it.
'name': 'clang',
@@ -2238,7 +2477,9 @@ hooks = [
{
'name': 'clang_format_win',
'pattern': '.',
- 'action': [ 'download_from_google_storage',
+ 'condition': 'host_os == "win"',
+ 'action': [ 'python3',
+ 'src/third_party/depot_tools/download_from_google_storage.py',
'--no_resume',
'--platform=win32',
'--no_auth',
@@ -2247,21 +2488,38 @@ hooks = [
],
},
{
- 'name': 'clang_format_mac',
+ 'name': 'clang_format_mac_x64',
'pattern': '.',
- 'action': [ 'download_from_google_storage',
+ 'condition': 'host_os == "mac" and host_cpu == "x64"',
+ 'action': [ 'python3',
+ 'src/third_party/depot_tools/download_from_google_storage.py',
'--no_resume',
'--platform=darwin',
'--no_auth',
'--bucket', 'chromium-clang-format',
- '-s', 'src/buildtools/mac/clang-format.sha1',
+ '-s', 'src/buildtools/mac/clang-format.x64.sha1',
+ '-o', 'src/buildtools/mac/clang-format',
],
},
{
+ 'name': 'clang_format_mac_arm64',
+ 'pattern': '.',
+ 'condition': 'host_os == "mac" and host_cpu == "arm64"',
+ 'action': [ 'python3',
+ 'src/third_party/depot_tools/download_from_google_storage.py',
+ '--no_resume',
+ '--no_auth',
+ '--bucket', 'chromium-clang-format',
+ '-s', 'src/buildtools/mac/clang-format.arm64.sha1',
+ '-o', 'src/buildtools/mac/clang-format',
+ ],
+ },
+ {
'name': 'clang_format_linux',
'pattern': '.',
'condition': 'host_os == "linux"',
- 'action': [ 'download_from_google_storage',
+ 'action': [ 'python3',
+ 'src/third_party/depot_tools/download_from_google_storage.py',
'--no_resume',
'--platform=linux*',
'--no_auth',
@@ -2304,18 +2562,6 @@ hooks = [
],
},
{
- # We used to use src as a CIPD root. We moved it to a different directory
- # in crrev.com/c/930178 but left the clobber here to ensure that that CL
- # could be reverted safely. This can be safely removed once crbug.com/794764
- # is resolved.
- 'name': 'Android Clobber Deprecated CIPD Root',
- 'pattern': '.',
- 'condition': 'checkout_android',
- 'action': ['src/build/cipd/clobber_cipd_root.py',
- '--root', 'src',
- ],
- },
- {
'name': 'Generate component metadata for tests',
'pattern': '.',
'action': [
diff --git a/files/DIR_METADATA b/DIR_METADATA
index 8bc04f15..8bc04f15 100644
--- a/files/DIR_METADATA
+++ b/DIR_METADATA
diff --git a/LICENSE b/LICENSE
index da40b336..c911747a 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,4 +1,4 @@
-Copyright (c) 2011, Google Inc. All rights reserved.
+Copyright 2011 The LibYuv Project Authors. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
diff --git a/METADATA b/METADATA
index bff062d8..19d0436e 100644
--- a/METADATA
+++ b/METADATA
@@ -1,14 +1,19 @@
-name: "libyuv"
-description:
- "libyuv is an open source project that includes YUV scaling and conversion "
- "functionality."
+# This project was upgraded with external_updater.
+# Usage: tools/external_updater/updater.sh update libyuv
+# For more info, check https://cs.android.com/android/platform/superproject/+/main:tools/external_updater/README.md
+name: "libyuv"
+description: "libyuv is an open source project that includes YUV scaling and conversion functionality."
third_party {
- url {
- type: GIT
+ license_type: NOTICE
+ last_upgrade_date {
+ year: 2024
+ month: 1
+ day: 11
+ }
+ identifier {
+ type: "Git"
value: "https://chromium.googlesource.com/libyuv/libyuv/"
+ version: "af6ac8265bbd07bcf977526458b60305c4304288"
}
- version: "d53f1beecdd8d959f7a3f2e19bd0bd7e7227a233"
- last_upgrade_date { year: 2022 month: 8 day: 5 }
- license_type: NOTICE
}
diff --git a/OWNERS b/OWNERS
index a607e727..f11a7bfd 100644
--- a/OWNERS
+++ b/OWNERS
@@ -1,4 +1,11 @@
-fbarchard@google.com
-phoglund@google.com
-magjed@google.com
-chz@google.com
+mbonadei@chromium.org
+fbarchard@chromium.org
+magjed@chromium.org
+wtc@google.com
+jansson@google.com
+
+per-file *.gn=mbonadei@chromium.org,jansson@google.com
+per-file .gitignore=*
+per-file AUTHORS=*
+per-file DEPS=*
+per-file PRESUBMIT.py=mbonadei@chromium.org,jansson@google.com
diff --git a/OWNERS.android b/OWNERS.android
new file mode 100644
index 00000000..7529cb92
--- /dev/null
+++ b/OWNERS.android
@@ -0,0 +1 @@
+include platform/system/core:/janitors/OWNERS
diff --git a/files/PATENTS b/PATENTS
index 64aa5c90..64aa5c90 100644
--- a/files/PATENTS
+++ b/PATENTS
diff --git a/files/PRESUBMIT.py b/PRESUBMIT.py
index d3901caf..d3901caf 100644
--- a/files/PRESUBMIT.py
+++ b/PRESUBMIT.py
diff --git a/files/README.chromium b/README.chromium
index 3f68e21e..1389f285 100644
--- a/files/README.chromium
+++ b/README.chromium
@@ -1,8 +1,9 @@
Name: libyuv
-URL: http://code.google.com/p/libyuv/
-Version: 1837
+URL: https://chromium.googlesource.com/libyuv/libyuv/
+Version: 1883
License: BSD
License File: LICENSE
+Shipped: yes
Description:
libyuv is an open source project that includes YUV conversion and scaling functionality.
diff --git a/files/README.md b/README.md
index db70b7f0..95eeb04c 100644
--- a/files/README.md
+++ b/README.md
@@ -7,6 +7,7 @@
* Optimized for SSSE3/AVX2 on x86/x64.
* Optimized for Neon on Arm.
* Optimized for MSA on Mips.
+* Optimized for RVV on RISC-V.
### Development
diff --git a/README.version b/README.version
deleted file mode 100644
index 5deb188e..00000000
--- a/README.version
+++ /dev/null
@@ -1,8 +0,0 @@
-Version: r1837
-BugComponent: 42195
-Owner: lajos
-Local Modifications:
- * Remove files/Android.mk (it messes with the android build system).
- * Remove OWNERS files within files/ and all the subdirectories (except for
- files/fuzz). Having these files breaks repo presubmit hooks since they
- contain non @google.com email addresses.
diff --git a/UPDATING b/UPDATING
deleted file mode 100644
index 2679284c..00000000
--- a/UPDATING
+++ /dev/null
@@ -1,36 +0,0 @@
-To sync the libyuv checkout to an upstream revision, do the following:
-
-These commands are known to work from the external/libyuv directory of the
-Android tree's checkout.
-
-Step 1: Remove the files/ subdirectory.
-
-$ rm -rf files
-
-Step 2: Clone the libyuv repository from upstream.
-
-$ git clone https://chromium.googlesource.com/libyuv/libyuv files
-
-Step 3 (optional): Checkout a specific commit/tag.
-
-$ cd files
-$ git checkout <commit_or_tag>
-$ cd ..
-
-Step 4: Remove files that aren't necessary (Android.mk, .git and OWNERS).
-
-$ rm files/Android.mk
-$ rm -rf files/.git
-$ find files/ -name "OWNERS" | xargs rm
-
-Step 5: Update the version and last_upgrade_date fields in the METADATA file.
-
-Step 6: Update README.version with the version (can be found in
- files/include/libyuv/version.h)
-
-Step 7: If any local modifications are being done, update README.version and
- this file with updated instructions.
-
-Step 8: Ensure that libyuv builds and camera and media related CTS tests are
- passing. If there are any linker errors about missing symbols, try
- updating frameworks/av/media/libstagefright/export.lds.
diff --git a/files/build_overrides/build.gni b/build_overrides/build.gni
index c8490313..d9d01d51 100644
--- a/files/build_overrides/build.gni
+++ b/build_overrides/build.gni
@@ -13,6 +13,9 @@ build_with_chromium = false
# Some non-Chromium builds don't support building java targets.
enable_java_templates = true
+# Enables assertions on safety checks in libc++.
+enable_safe_libcxx = true
+
# Allow using custom suppressions files (currently not used by libyuv).
asan_suppressions_file = "//build/sanitizers/asan_suppressions.cc"
lsan_suppressions_file = "//build/sanitizers/lsan_suppressions.cc"
diff --git a/files/build_overrides/gtest.gni b/build_overrides/gtest.gni
index d3c3f68c..d3c3f68c 100644
--- a/files/build_overrides/gtest.gni
+++ b/build_overrides/gtest.gni
diff --git a/build_overrides/partition_alloc.gni b/build_overrides/partition_alloc.gni
new file mode 100644
index 00000000..dcf8ac2d
--- /dev/null
+++ b/build_overrides/partition_alloc.gni
@@ -0,0 +1,17 @@
+# Copyright 2022 The LibYuv Project Authors. All rights reserved.
+#
+# Use of this source code is governed by a BSD-style license
+# that can be found in the LICENSE file in the root of the source
+# tree. An additional intellectual property rights grant can be found
+# in the file PATENTS. All contributing project authors may
+# be found in the AUTHORS file in the root of the source tree.
+
+# Use default values for PartitionAlloc as standalone library from
+# base/allocator/partition_allocator/build_overrides/partition_alloc.gni
+use_partition_alloc_as_malloc_default = false
+use_allocator_shim_default = false
+enable_backup_ref_ptr_support_default = false
+enable_mte_checked_ptr_support_default = false
+put_ref_count_in_previous_slot_default = false
+enable_backup_ref_ptr_slow_checks_default = false
+enable_dangling_raw_ptr_checks_default = false
diff --git a/files/cleanup_links.py b/cleanup_links.py
index 7d1eba9b..7d1eba9b 100755
--- a/files/cleanup_links.py
+++ b/cleanup_links.py
diff --git a/codereview.settings b/codereview.settings
index 9782886f..b226fae5 100644
--- a/codereview.settings
+++ b/codereview.settings
@@ -1,5 +1,5 @@
-# This file is used by git cl to get repository specific information.
+# This file is used by `git cl` to get repository specific information.
+CODE_REVIEW_SERVER: codereview.chromium.org
GERRIT_HOST: True
PROJECT: libyuv
-TRY_ON_UPLOAD: False
VIEW_VC: https://chromium.googlesource.com/libyuv/libyuv/+/
diff --git a/files/docs/deprecated_builds.md b/docs/deprecated_builds.md
index ba42966c..8edefd78 100644
--- a/files/docs/deprecated_builds.md
+++ b/docs/deprecated_builds.md
@@ -165,11 +165,11 @@ mipsel
arm32 disassembly:
- third_party/android_ndk/toolchains/arm-linux-androideabi-4.9/prebuilt/linux-x86_64/bin/arm-linux-androideabi-objdump -d out/Release/obj/source/libyuv.row_neon.o
+ llvm-objdump -d out/Release/obj/source/libyuv.row_neon.o
arm64 disassembly:
- third_party/android_ndk/toolchains/aarch64-linux-android-4.9/prebuilt/linux-x86_64/bin/aarch64-linux-android-objdump -d out/Release/obj/source/libyuv.row_neon64.o
+ llvm-objdump -d out/Release/obj/source/libyuv.row_neon64.o
Running tests:
diff --git a/files/docs/environment_variables.md b/docs/environment_variables.md
index dd5d59fb..4eb09659 100644
--- a/files/docs/environment_variables.md
+++ b/docs/environment_variables.md
@@ -40,6 +40,9 @@ By default the cpu is detected and the most advanced form of SIMD is used. But
LIBYUV_DISABLE_LSX
LIBYUV_DISABLE_LASX
+## RISCV CPUs
+ LIBYUV_DISABLE_RVV
+
# Test Width/Height/Repeat
The unittests default to a small image (128x72) to run fast. This can be set by environment variable to test a specific resolutions.
diff --git a/files/docs/filtering.md b/docs/filtering.md
index 8696976e..8696976e 100644
--- a/files/docs/filtering.md
+++ b/docs/filtering.md
diff --git a/files/docs/formats.md b/docs/formats.md
index 12ea9465..12ea9465 100644
--- a/files/docs/formats.md
+++ b/docs/formats.md
diff --git a/files/docs/getting_started.md b/docs/getting_started.md
index 15b19ab2..f2f71b8b 100644
--- a/files/docs/getting_started.md
+++ b/docs/getting_started.md
@@ -139,11 +139,11 @@ mips
arm disassembly:
- third_party/android_ndk/toolchains/aarch64-linux-android-4.9/prebuilt/linux-x86_64/bin/aarch64-linux-android-objdump -d ./out/Release/obj/libyuv/row_common.o >row_common.txt
+ llvm-objdump -d ./out/Release/obj/libyuv/row_common.o >row_common.txt
- third_party/android_ndk/toolchains/aarch64-linux-android-4.9/prebuilt/linux-x86_64/bin/aarch64-linux-android-objdump -d ./out/Release/obj/libyuv_neon/row_neon.o >row_neon.txt
+ llvm-objdump -d ./out/Release/obj/libyuv_neon/row_neon.o >row_neon.txt
- third_party/android_ndk/toolchains/aarch64-linux-android-4.9/prebuilt/linux-x86_64/bin/aarch64-linux-android-objdump -d ./out/Release/obj/libyuv_neon/row_neon64.o >row_neon64.txt
+ llvm-objdump -d ./out/Release/obj/libyuv_neon/row_neon64.o >row_neon64.txt
Caveat: Disassembly may require optimize_max be disabled in BUILD.gn
@@ -220,6 +220,47 @@ Install cmake: http://www.cmake.org/
make -j4
make package
+## Building RISC-V target with cmake
+
+### Prerequisite: build risc-v clang toolchain and qemu
+
+If you don't have prebuilt clang and riscv64 qemu, run the script to download source and build them.
+
+ ./riscv_script/prepare_toolchain_qemu.sh
+
+After running script, clang & qemu are built in `build-toolchain-qemu/riscv-clang/` & `build-toolchain-qemu/riscv-qemu/`.
+
+### Cross-compile for RISC-V target
+ cmake -B out/Release/ -DUNIT_TEST=ON \
+ -DCMAKE_BUILD_TYPE=Release \
+ -DCMAKE_TOOLCHAIN_FILE="./riscv_script/riscv-clang.cmake" \
+ -DTOOLCHAIN_PATH={TOOLCHAIN_PATH} \
+ -DUSE_RVV=ON .
+ cmake --build out/Release/
+
+#### Customized Compiler Flags
+
+Customized compiler flags are supported by `-DRISCV_COMPILER_FLAGS="xxx"`.
+If `-DRISCV_COMPILER_FLAGS="xxx"` is manually assigned, other compile flags(e.g disable -march=xxx) will not be appended.
+
+Example:
+
+ cmake -B out/Release/ -DUNIT_TEST=ON \
+ -DCMAKE_BUILD_TYPE=Release \
+ -DCMAKE_TOOLCHAIN_FILE="./riscv_script/riscv-clang.cmake" \
+ -DRISCV_COMPILER_FLAGS="-mcpu=sifive-x280" \
+ .
+
+### Run on QEMU
+
+#### Run libyuv_unittest on QEMU
+ cd out/Release/
+ USE_RVV=ON \
+ TOOLCHAIN_PATH={TOOLCHAIN_PATH} \
+ QEMU_PREFIX_PATH={QEMU_PREFIX_PATH} \
+ ../../riscv_script/run_qemu.sh libyuv_unittest
+
+
## Setup for Arm Cross compile
See also https://www.ccoderun.ca/programming/2015-12-20_CrossCompiling/index.html
diff --git a/files/docs/rotation.md b/docs/rotation.md
index a08430fd..a08430fd 100644
--- a/files/docs/rotation.md
+++ b/docs/rotation.md
diff --git a/files/download_vs_toolchain.py b/download_vs_toolchain.py
index 6bc086d6..6bc086d6 100644
--- a/files/download_vs_toolchain.py
+++ b/download_vs_toolchain.py
diff --git a/files/Android.bp b/files/Android.bp
deleted file mode 100644
index 36156287..00000000
--- a/files/Android.bp
+++ /dev/null
@@ -1,196 +0,0 @@
-package {
- default_applicable_licenses: ["external_libyuv_files_license"],
-}
-
-// Added automatically by a large-scale-change
-//
-// large-scale-change included anything that looked like it might be a license
-// text as a license_text. e.g. LICENSE, NOTICE, COPYING etc.
-//
-// Please consider removing redundant or irrelevant files from 'license_text:'.
-// See: http://go/android-license-faq
-license {
- name: "external_libyuv_files_license",
- visibility: [":__subpackages__"],
- license_kinds: [
- "SPDX-license-identifier-BSD",
- ],
- license_text: [
- "LICENSE",
- "PATENTS",
- ],
-}
-
-cc_library {
- name: "libyuv",
- vendor_available: true,
- product_available: true,
- host_supported: true,
- vndk: {
- enabled: true,
- },
-
- srcs: [
- "source/compare.cc",
- "source/compare_common.cc",
- "source/compare_gcc.cc",
- "source/compare_msa.cc",
- "source/compare_neon.cc",
- "source/compare_neon64.cc",
- "source/convert.cc",
- "source/convert_argb.cc",
- "source/convert_from.cc",
- "source/convert_from_argb.cc",
- "source/convert_jpeg.cc",
- "source/convert_to_argb.cc",
- "source/convert_to_i420.cc",
- "source/cpu_id.cc",
- "source/mjpeg_decoder.cc",
- "source/mjpeg_validate.cc",
- "source/planar_functions.cc",
- "source/rotate.cc",
- "source/rotate_any.cc",
- "source/rotate_argb.cc",
- "source/rotate_common.cc",
- "source/rotate_gcc.cc",
- "source/rotate_msa.cc",
- "source/rotate_neon.cc",
- "source/rotate_neon64.cc",
- "source/row_any.cc",
- "source/row_common.cc",
- "source/row_gcc.cc",
- "source/row_msa.cc",
- "source/row_neon.cc",
- "source/row_neon64.cc",
- "source/scale.cc",
- "source/scale_any.cc",
- "source/scale_argb.cc",
- "source/scale_common.cc",
- "source/scale_gcc.cc",
- "source/scale_msa.cc",
- "source/scale_neon.cc",
- "source/scale_neon64.cc",
- "source/scale_rgb.cc",
- "source/scale_uv.cc",
- "source/video_common.cc",
- ],
-
- cflags: [
- "-Wall",
- "-Werror",
- "-Wno-unused-parameter",
- "-fexceptions",
- "-DHAVE_JPEG",
- ],
-
- arch: {
- arm: {
- cflags: ["-mfpu=neon"],
- },
- },
-
- shared_libs: ["libjpeg"],
-
- export_include_dirs: ["include"],
-
- apex_available: [
- "//apex_available:platform",
- "com.android.media.swcodec",
- ],
- min_sdk_version: "29",
-}
-
-// compatibilty static library until all uses of libyuv_static are replaced
-// with libyuv (b/37646797)
-cc_library_static {
- name: "libyuv_static",
- vendor_available: true,
- whole_static_libs: ["libyuv"],
- apex_available: [
- "//apex_available:platform",
- "com.android.media.swcodec",
- ],
- min_sdk_version: "29",
-}
-
-cc_test {
- name: "libyuv_unittest",
- static_libs: ["libyuv"],
- shared_libs: ["libjpeg"],
- cflags: ["-Wall", "-Werror"],
- srcs: [
- "unit_test/basictypes_test.cc",
- "unit_test/color_test.cc",
- "unit_test/compare_test.cc",
- "unit_test/convert_test.cc",
- "unit_test/cpu_test.cc",
- "unit_test/cpu_thread_test.cc",
- "unit_test/math_test.cc",
- "unit_test/planar_test.cc",
- "unit_test/rotate_argb_test.cc",
- "unit_test/rotate_test.cc",
- "unit_test/scale_argb_test.cc",
- "unit_test/scale_rgb_test.cc",
- "unit_test/scale_test.cc",
- "unit_test/scale_uv_test.cc",
- "unit_test/unit_test.cc",
- "unit_test/video_common_test.cc",
- ],
-}
-
-cc_test {
- name: "compare",
- gtest: false,
- srcs: [
- "util/compare.cc",
- ],
- static_libs: ["libyuv"],
-}
-
-cc_test {
- name: "cpuid",
- gtest: false,
- srcs: [
- "util/cpuid.c",
- ],
- static_libs: ["libyuv"],
-}
-
-cc_test {
- name: "i444tonv12_eg",
- gtest: false,
- srcs: [
- "util/i444tonv12_eg.cc",
- ],
- static_libs: ["libyuv"],
-}
-
-cc_test {
- name: "psnr",
- gtest: false,
- srcs: [
- "util/psnr_main.cc",
- "util/psnr.cc",
- "util/ssim.cc",
- ],
- static_libs: ["libyuv"],
-}
-
-cc_test {
- name: "yuvconstants",
- gtest: false,
- srcs: [
- "util/yuvconstants.c",
- ],
- static_libs: ["libyuv"],
-}
-
-cc_test {
- name: "yuvconvert",
- gtest: false,
- srcs: [
- "util/yuvconvert.cc",
- ],
- static_libs: ["libyuv"],
- shared_libs: ["libjpeg"],
-}
diff --git a/files/LICENSE b/files/LICENSE
deleted file mode 100644
index c911747a..00000000
--- a/files/LICENSE
+++ /dev/null
@@ -1,29 +0,0 @@
-Copyright 2011 The LibYuv Project Authors. All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are
-met:
-
- * Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
-
- * Redistributions in binary form must reproduce the above copyright
- notice, this list of conditions and the following disclaimer in
- the documentation and/or other materials provided with the
- distribution.
-
- * Neither the name of Google nor the names of its contributors may
- be used to endorse or promote products derived from this software
- without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/files/codereview.settings b/files/codereview.settings
deleted file mode 100644
index b226fae5..00000000
--- a/files/codereview.settings
+++ /dev/null
@@ -1,5 +0,0 @@
-# This file is used by `git cl` to get repository specific information.
-CODE_REVIEW_SERVER: codereview.chromium.org
-GERRIT_HOST: True
-PROJECT: libyuv
-VIEW_VC: https://chromium.googlesource.com/libyuv/libyuv/+/
diff --git a/files/public.mk b/files/public.mk
deleted file mode 100644
index 1342307a..00000000
--- a/files/public.mk
+++ /dev/null
@@ -1,13 +0,0 @@
-# This file contains all the common make variables which are useful for
-# anyone depending on this library.
-# Note that dependencies on NDK are not directly listed since NDK auto adds
-# them.
-
-LIBYUV_INCLUDES := $(LIBYUV_PATH)/include
-
-LIBYUV_C_FLAGS :=
-
-LIBYUV_CPP_FLAGS :=
-
-LIBYUV_LDLIBS :=
-LIBYUV_DEP_MODULES :=
diff --git a/files/source/compare_mmi.cc b/files/source/compare_mmi.cc
deleted file mode 100644
index 7640d946..00000000
--- a/files/source/compare_mmi.cc
+++ /dev/null
@@ -1,123 +0,0 @@
-/*
- * Copyright 2012 The LibYuv Project Authors. All rights reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "libyuv/basic_types.h"
-
-#include "libyuv/compare_row.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-// This module is for Mips MMI.
-#if !defined(LIBYUV_DISABLE_MMI) && defined(_MIPS_ARCH_LOONGSON3A)
-
-// Hakmem method for hamming distance.
-uint32_t HammingDistance_MMI(const uint8_t* src_a,
- const uint8_t* src_b,
- int count) {
- uint32_t diff = 0u;
-
- uint64_t temp = 0, temp1 = 0, ta = 0, tb = 0;
- uint64_t c1 = 0x5555555555555555;
- uint64_t c2 = 0x3333333333333333;
- uint64_t c3 = 0x0f0f0f0f0f0f0f0f;
- uint32_t c4 = 0x01010101;
- uint64_t s1 = 1, s2 = 2, s3 = 4;
- __asm__ volatile(
- "1: \n\t"
- "ldc1 %[ta], 0(%[src_a]) \n\t"
- "ldc1 %[tb], 0(%[src_b]) \n\t"
- "xor %[temp], %[ta], %[tb] \n\t"
- "psrlw %[temp1], %[temp], %[s1] \n\t" // temp1=x>>1
- "and %[temp1], %[temp1], %[c1] \n\t" // temp1&=c1
- "psubw %[temp1], %[temp], %[temp1] \n\t" // x-temp1
- "and %[temp], %[temp1], %[c2] \n\t" // t = (u&c2)
- "psrlw %[temp1], %[temp1], %[s2] \n\t" // u>>2
- "and %[temp1], %[temp1], %[c2] \n\t" // u>>2 & c2
- "paddw %[temp1], %[temp1], %[temp] \n\t" // t1 = t1+t
- "psrlw %[temp], %[temp1], %[s3] \n\t" // u>>4
- "paddw %[temp1], %[temp1], %[temp] \n\t" // u+(u>>4)
- "and %[temp1], %[temp1], %[c3] \n\t" //&c3
- "dmfc1 $t0, %[temp1] \n\t"
- "dsrl32 $t0, $t0, 0 \n\t "
- "mul $t0, $t0, %[c4] \n\t"
- "dsrl $t0, $t0, 24 \n\t"
- "dadd %[diff], %[diff], $t0 \n\t"
- "dmfc1 $t0, %[temp1] \n\t"
- "mul $t0, $t0, %[c4] \n\t"
- "dsrl $t0, $t0, 24 \n\t"
- "dadd %[diff], %[diff], $t0 \n\t"
- "daddiu %[src_a], %[src_a], 8 \n\t"
- "daddiu %[src_b], %[src_b], 8 \n\t"
- "addiu %[count], %[count], -8 \n\t"
- "bgtz %[count], 1b \n\t"
- "nop \n\t"
- : [diff] "+r"(diff), [src_a] "+r"(src_a), [src_b] "+r"(src_b),
- [count] "+r"(count), [ta] "+f"(ta), [tb] "+f"(tb), [temp] "+f"(temp),
- [temp1] "+f"(temp1)
- : [c1] "f"(c1), [c2] "f"(c2), [c3] "f"(c3), [c4] "r"(c4), [s1] "f"(s1),
- [s2] "f"(s2), [s3] "f"(s3)
- : "memory");
- return diff;
-}
-
-uint32_t SumSquareError_MMI(const uint8_t* src_a,
- const uint8_t* src_b,
- int count) {
- uint32_t sse = 0u;
- uint32_t sse_hi = 0u, sse_lo = 0u;
-
- uint64_t src1, src2;
- uint64_t diff, diff_hi, diff_lo;
- uint64_t sse_sum, sse_tmp;
-
- const uint64_t mask = 0x0ULL;
-
- __asm__ volatile(
- "xor %[sse_sum], %[sse_sum], %[sse_sum] \n\t"
-
- "1: \n\t"
- "ldc1 %[src1], 0x00(%[src_a]) \n\t"
- "ldc1 %[src2], 0x00(%[src_b]) \n\t"
- "pasubub %[diff], %[src1], %[src2] \n\t"
- "punpcklbh %[diff_lo], %[diff], %[mask] \n\t"
- "punpckhbh %[diff_hi], %[diff], %[mask] \n\t"
- "pmaddhw %[sse_tmp], %[diff_lo], %[diff_lo] \n\t"
- "paddw %[sse_sum], %[sse_sum], %[sse_tmp] \n\t"
- "pmaddhw %[sse_tmp], %[diff_hi], %[diff_hi] \n\t"
- "paddw %[sse_sum], %[sse_sum], %[sse_tmp] \n\t"
-
- "daddiu %[src_a], %[src_a], 0x08 \n\t"
- "daddiu %[src_b], %[src_b], 0x08 \n\t"
- "daddiu %[count], %[count], -0x08 \n\t"
- "bnez %[count], 1b \n\t"
-
- "mfc1 %[sse_lo], %[sse_sum] \n\t"
- "mfhc1 %[sse_hi], %[sse_sum] \n\t"
- "daddu %[sse], %[sse_hi], %[sse_lo] \n\t"
- : [sse] "+&r"(sse), [diff] "=&f"(diff), [src1] "=&f"(src1),
- [src2] "=&f"(src2), [diff_lo] "=&f"(diff_lo), [diff_hi] "=&f"(diff_hi),
- [sse_sum] "=&f"(sse_sum), [sse_tmp] "=&f"(sse_tmp),
- [sse_hi] "+&r"(sse_hi), [sse_lo] "+&r"(sse_lo)
- : [src_a] "r"(src_a), [src_b] "r"(src_b), [count] "r"(count),
- [mask] "f"(mask)
- : "memory");
-
- return sse;
-}
-
-#endif // !defined(LIBYUV_DISABLE_MMI) && defined(_MIPS_ARCH_LOONGSON3A)
-
-#ifdef __cplusplus
-} // extern "C"
-} // namespace libyuv
-#endif
diff --git a/files/source/rotate_common.cc b/files/source/rotate_common.cc
deleted file mode 100644
index ff212ade..00000000
--- a/files/source/rotate_common.cc
+++ /dev/null
@@ -1,106 +0,0 @@
-/*
- * Copyright 2011 The LibYuv Project Authors. All rights reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "libyuv/rotate_row.h"
-#include "libyuv/row.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-void TransposeWx8_C(const uint8_t* src,
- int src_stride,
- uint8_t* dst,
- int dst_stride,
- int width) {
- int i;
- for (i = 0; i < width; ++i) {
- dst[0] = src[0 * src_stride];
- dst[1] = src[1 * src_stride];
- dst[2] = src[2 * src_stride];
- dst[3] = src[3 * src_stride];
- dst[4] = src[4 * src_stride];
- dst[5] = src[5 * src_stride];
- dst[6] = src[6 * src_stride];
- dst[7] = src[7 * src_stride];
- ++src;
- dst += dst_stride;
- }
-}
-
-void TransposeUVWx8_C(const uint8_t* src,
- int src_stride,
- uint8_t* dst_a,
- int dst_stride_a,
- uint8_t* dst_b,
- int dst_stride_b,
- int width) {
- int i;
- for (i = 0; i < width; ++i) {
- dst_a[0] = src[0 * src_stride + 0];
- dst_b[0] = src[0 * src_stride + 1];
- dst_a[1] = src[1 * src_stride + 0];
- dst_b[1] = src[1 * src_stride + 1];
- dst_a[2] = src[2 * src_stride + 0];
- dst_b[2] = src[2 * src_stride + 1];
- dst_a[3] = src[3 * src_stride + 0];
- dst_b[3] = src[3 * src_stride + 1];
- dst_a[4] = src[4 * src_stride + 0];
- dst_b[4] = src[4 * src_stride + 1];
- dst_a[5] = src[5 * src_stride + 0];
- dst_b[5] = src[5 * src_stride + 1];
- dst_a[6] = src[6 * src_stride + 0];
- dst_b[6] = src[6 * src_stride + 1];
- dst_a[7] = src[7 * src_stride + 0];
- dst_b[7] = src[7 * src_stride + 1];
- src += 2;
- dst_a += dst_stride_a;
- dst_b += dst_stride_b;
- }
-}
-
-void TransposeWxH_C(const uint8_t* src,
- int src_stride,
- uint8_t* dst,
- int dst_stride,
- int width,
- int height) {
- int i;
- for (i = 0; i < width; ++i) {
- int j;
- for (j = 0; j < height; ++j) {
- dst[i * dst_stride + j] = src[j * src_stride + i];
- }
- }
-}
-
-void TransposeUVWxH_C(const uint8_t* src,
- int src_stride,
- uint8_t* dst_a,
- int dst_stride_a,
- uint8_t* dst_b,
- int dst_stride_b,
- int width,
- int height) {
- int i;
- for (i = 0; i < width * 2; i += 2) {
- int j;
- for (j = 0; j < height; ++j) {
- dst_a[j + ((i >> 1) * dst_stride_a)] = src[i + (j * src_stride)];
- dst_b[j + ((i >> 1) * dst_stride_b)] = src[i + (j * src_stride) + 1];
- }
- }
-}
-
-#ifdef __cplusplus
-} // extern "C"
-} // namespace libyuv
-#endif
diff --git a/files/source/rotate_mmi.cc b/files/source/rotate_mmi.cc
deleted file mode 100644
index f8de6083..00000000
--- a/files/source/rotate_mmi.cc
+++ /dev/null
@@ -1,291 +0,0 @@
-/*
- * Copyright 2011 The LibYuv Project Authors. All rights reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "libyuv/rotate_row.h"
-#include "libyuv/row.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-// This module is for Mips MMI.
-#if !defined(LIBYUV_DISABLE_MMI) && defined(_MIPS_ARCH_LOONGSON3A)
-
-void TransposeWx8_MMI(const uint8_t* src,
- int src_stride,
- uint8_t* dst,
- int dst_stride,
- int width) {
- uint64_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
- uint64_t tmp7, tmp8, tmp9, tmp10, tmp11, tmp12, tmp13;
- uint8_t* src_tmp = nullptr;
-
- __asm__ volatile(
- "1: \n\t"
- "ldc1 %[tmp12], 0x00(%[src]) \n\t"
- "dadd %[src_tmp], %[src], %[src_stride] \n\t"
- "ldc1 %[tmp13], 0x00(%[src_tmp]) \n\t"
-
- /* tmp0 = (00 10 01 11 02 12 03 13) */
- "punpcklbh %[tmp0], %[tmp12], %[tmp13] \n\t"
- /* tmp1 = (04 14 05 15 06 16 07 17) */
- "punpckhbh %[tmp1], %[tmp12], %[tmp13] \n\t"
-
- "dadd %[src_tmp], %[src_tmp], %[src_stride] \n\t"
- "ldc1 %[tmp12], 0x00(%[src_tmp]) \n\t"
- "dadd %[src_tmp], %[src_tmp], %[src_stride] \n\t"
- "ldc1 %[tmp13], 0x00(%[src_tmp]) \n\t"
-
- /* tmp2 = (20 30 21 31 22 32 23 33) */
- "punpcklbh %[tmp2], %[tmp12], %[tmp13] \n\t"
- /* tmp3 = (24 34 25 35 26 36 27 37) */
- "punpckhbh %[tmp3], %[tmp12], %[tmp13] \n\t"
-
- /* tmp4 = (00 10 20 30 01 11 21 31) */
- "punpcklhw %[tmp4], %[tmp0], %[tmp2] \n\t"
- /* tmp5 = (02 12 22 32 03 13 23 33) */
- "punpckhhw %[tmp5], %[tmp0], %[tmp2] \n\t"
- /* tmp6 = (04 14 24 34 05 15 25 35) */
- "punpcklhw %[tmp6], %[tmp1], %[tmp3] \n\t"
- /* tmp7 = (06 16 26 36 07 17 27 37) */
- "punpckhhw %[tmp7], %[tmp1], %[tmp3] \n\t"
-
- "dadd %[src_tmp], %[src_tmp], %[src_stride] \n\t"
- "ldc1 %[tmp12], 0x00(%[src_tmp]) \n\t"
- "dadd %[src_tmp], %[src_tmp], %[src_stride] \n\t"
- "ldc1 %[tmp13], 0x00(%[src_tmp]) \n\t"
-
- /* tmp0 = (40 50 41 51 42 52 43 53) */
- "punpcklbh %[tmp0], %[tmp12], %[tmp13] \n\t"
- /* tmp1 = (44 54 45 55 46 56 47 57) */
- "punpckhbh %[tmp1], %[tmp12], %[tmp13] \n\t"
-
- "dadd %[src_tmp], %[src_tmp], %[src_stride] \n\t"
- "ldc1 %[tmp12], 0x00(%[src_tmp]) \n\t"
- "dadd %[src_tmp], %[src_tmp], %[src_stride] \n\t"
- "ldc1 %[tmp13], 0x00(%[src_tmp]) \n\t"
-
- /* tmp2 = (60 70 61 71 62 72 63 73) */
- "punpcklbh %[tmp2], %[tmp12], %[tmp13] \n\t"
- /* tmp3 = (64 74 65 75 66 76 67 77) */
- "punpckhbh %[tmp3], %[tmp12], %[tmp13] \n\t"
-
- /* tmp8 = (40 50 60 70 41 51 61 71) */
- "punpcklhw %[tmp8], %[tmp0], %[tmp2] \n\t"
- /* tmp9 = (42 52 62 72 43 53 63 73) */
- "punpckhhw %[tmp9], %[tmp0], %[tmp2] \n\t"
- /* tmp10 = (44 54 64 74 45 55 65 75) */
- "punpcklhw %[tmp10], %[tmp1], %[tmp3] \n\t"
- /* tmp11 = (46 56 66 76 47 57 67 77) */
- "punpckhhw %[tmp11], %[tmp1], %[tmp3] \n\t"
-
- /* tmp0 = (00 10 20 30 40 50 60 70) */
- "punpcklwd %[tmp0], %[tmp4], %[tmp8] \n\t"
- /* tmp1 = (01 11 21 31 41 51 61 71) */
- "punpckhwd %[tmp1], %[tmp4], %[tmp8] \n\t"
- "gssdlc1 %[tmp0], 0x07(%[dst]) \n\t"
- "gssdrc1 %[tmp0], 0x00(%[dst]) \n\t"
- "dadd %[dst], %[dst], %[dst_stride] \n\t"
- "gssdlc1 %[tmp1], 0x07(%[dst]) \n\t"
- "gssdrc1 %[tmp1], 0x00(%[dst]) \n\t"
-
- /* tmp0 = (02 12 22 32 42 52 62 72) */
- "punpcklwd %[tmp0], %[tmp5], %[tmp9] \n\t"
- /* tmp1 = (03 13 23 33 43 53 63 73) */
- "punpckhwd %[tmp1], %[tmp5], %[tmp9] \n\t"
- "dadd %[dst], %[dst], %[dst_stride] \n\t"
- "gssdlc1 %[tmp0], 0x07(%[dst]) \n\t"
- "gssdrc1 %[tmp0], 0x00(%[dst]) \n\t"
- "dadd %[dst], %[dst], %[dst_stride] \n\t"
- "gssdlc1 %[tmp1], 0x07(%[dst]) \n\t"
- "gssdrc1 %[tmp1], 0x00(%[dst]) \n\t"
-
- /* tmp0 = (04 14 24 34 44 54 64 74) */
- "punpcklwd %[tmp0], %[tmp6], %[tmp10] \n\t"
- /* tmp1 = (05 15 25 35 45 55 65 75) */
- "punpckhwd %[tmp1], %[tmp6], %[tmp10] \n\t"
- "dadd %[dst], %[dst], %[dst_stride] \n\t"
- "gssdlc1 %[tmp0], 0x07(%[dst]) \n\t"
- "gssdrc1 %[tmp0], 0x00(%[dst]) \n\t"
- "dadd %[dst], %[dst], %[dst_stride] \n\t"
- "gssdlc1 %[tmp1], 0x07(%[dst]) \n\t"
- "gssdrc1 %[tmp1], 0x00(%[dst]) \n\t"
-
- /* tmp0 = (06 16 26 36 46 56 66 76) */
- "punpcklwd %[tmp0], %[tmp7], %[tmp11] \n\t"
- /* tmp1 = (07 17 27 37 47 57 67 77) */
- "punpckhwd %[tmp1], %[tmp7], %[tmp11] \n\t"
- "dadd %[dst], %[dst], %[dst_stride] \n\t"
- "gssdlc1 %[tmp0], 0x07(%[dst]) \n\t"
- "gssdrc1 %[tmp0], 0x00(%[dst]) \n\t"
- "dadd %[dst], %[dst], %[dst_stride] \n\t"
- "gssdlc1 %[tmp1], 0x07(%[dst]) \n\t"
- "gssdrc1 %[tmp1], 0x00(%[dst]) \n\t"
-
- "dadd %[dst], %[dst], %[dst_stride] \n\t"
- "daddi %[src], %[src], 0x08 \n\t"
- "daddi %[width], %[width], -0x08 \n\t"
- "bnez %[width], 1b \n\t"
-
- : [tmp0] "=&f"(tmp0), [tmp1] "=&f"(tmp1), [tmp2] "=&f"(tmp2),
- [tmp3] "=&f"(tmp3), [tmp4] "=&f"(tmp4), [tmp5] "=&f"(tmp5),
- [tmp6] "=&f"(tmp6), [tmp7] "=&f"(tmp7), [tmp8] "=&f"(tmp8),
- [tmp9] "=&f"(tmp9), [tmp10] "=&f"(tmp10), [tmp11] "=&f"(tmp11),
- [tmp12] "=&f"(tmp12), [tmp13] "=&f"(tmp13), [dst] "+&r"(dst),
- [src_tmp] "+&r"(src_tmp)
- : [src] "r"(src), [width] "r"(width), [src_stride] "r"(src_stride),
- [dst_stride] "r"(dst_stride)
- : "memory");
-}
-
-void TransposeUVWx8_MMI(const uint8_t* src,
- int src_stride,
- uint8_t* dst_a,
- int dst_stride_a,
- uint8_t* dst_b,
- int dst_stride_b,
- int width) {
- uint64_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
- uint64_t tmp7, tmp8, tmp9, tmp10, tmp11, tmp12, tmp13;
- uint8_t* src_tmp = nullptr;
-
- __asm__ volatile(
- "1: \n\t"
- /* tmp12 = (u00 v00 u01 v01 u02 v02 u03 v03) */
- "ldc1 %[tmp12], 0x00(%[src]) \n\t"
- "dadd %[src_tmp], %[src], %[src_stride] \n\t"
- /* tmp13 = (u10 v10 u11 v11 u12 v12 u13 v13) */
- "ldc1 %[tmp13], 0x00(%[src_tmp]) \n\t"
-
- /* tmp0 = (u00 u10 v00 v10 u01 u11 v01 v11) */
- "punpcklbh %[tmp0], %[tmp12], %[tmp13] \n\t"
- /* tmp1 = (u02 u12 v02 v12 u03 u13 v03 v13) */
- "punpckhbh %[tmp1], %[tmp12], %[tmp13] \n\t"
-
- "dadd %[src_tmp], %[src_tmp], %[src_stride] \n\t"
- /* tmp12 = (u20 v20 u21 v21 u22 v22 u23 v23) */
- "ldc1 %[tmp12], 0x00(%[src_tmp]) \n\t"
- "dadd %[src_tmp], %[src_tmp], %[src_stride] \n\t"
- /* tmp13 = (u30 v30 u31 v31 u32 v32 u33 v33) */
- "ldc1 %[tmp13], 0x00(%[src_tmp]) \n\t"
-
- /* tmp2 = (u20 u30 v20 v30 u21 u31 v21 v31) */
- "punpcklbh %[tmp2], %[tmp12], %[tmp13] \n\t"
- /* tmp3 = (u22 u32 v22 v32 u23 u33 v23 v33) */
- "punpckhbh %[tmp3], %[tmp12], %[tmp13] \n\t"
-
- /* tmp4 = (u00 u10 u20 u30 v00 v10 v20 v30) */
- "punpcklhw %[tmp4], %[tmp0], %[tmp2] \n\t"
- /* tmp5 = (u01 u11 u21 u31 v01 v11 v21 v31) */
- "punpckhhw %[tmp5], %[tmp0], %[tmp2] \n\t"
- /* tmp6 = (u02 u12 u22 u32 v02 v12 v22 v32) */
- "punpcklhw %[tmp6], %[tmp1], %[tmp3] \n\t"
- /* tmp7 = (u03 u13 u23 u33 v03 v13 v23 v33) */
- "punpckhhw %[tmp7], %[tmp1], %[tmp3] \n\t"
-
- "dadd %[src_tmp], %[src_tmp], %[src_stride] \n\t"
- /* tmp12 = (u40 v40 u41 v41 u42 v42 u43 v43) */
- "ldc1 %[tmp12], 0x00(%[src_tmp]) \n\t"
- /* tmp13 = (u50 v50 u51 v51 u52 v52 u53 v53) */
- "dadd %[src_tmp], %[src_tmp], %[src_stride] \n\t"
- "ldc1 %[tmp13], 0x00(%[src_tmp]) \n\t"
-
- /* tmp0 = (u40 u50 v40 v50 u41 u51 v41 v51) */
- "punpcklbh %[tmp0], %[tmp12], %[tmp13] \n\t"
- /* tmp1 = (u42 u52 v42 v52 u43 u53 v43 v53) */
- "punpckhbh %[tmp1], %[tmp12], %[tmp13] \n\t"
-
- "dadd %[src_tmp], %[src_tmp], %[src_stride] \n\t"
- /* tmp12 = (u60 v60 u61 v61 u62 v62 u63 v63) */
- "ldc1 %[tmp12], 0x00(%[src_tmp]) \n\t"
- /* tmp13 = (u70 v70 u71 v71 u72 v72 u73 v73) */
- "dadd %[src_tmp], %[src_tmp], %[src_stride] \n\t"
- "ldc1 %[tmp13], 0x00(%[src_tmp]) \n\t"
-
- /* tmp2 = (u60 u70 v60 v70 u61 u71 v61 v71) */
- "punpcklbh %[tmp2], %[tmp12], %[tmp13] \n\t"
- /* tmp3 = (u62 u72 v62 v72 u63 u73 v63 v73) */
- "punpckhbh %[tmp3], %[tmp12], %[tmp13] \n\t"
-
- /* tmp8 = (u40 u50 u60 u70 v40 v50 v60 v70) */
- "punpcklhw %[tmp8], %[tmp0], %[tmp2] \n\t"
- /* tmp9 = (u41 u51 u61 u71 v41 v51 v61 v71) */
- "punpckhhw %[tmp9], %[tmp0], %[tmp2] \n\t"
- /* tmp10 = (u42 u52 u62 u72 v42 v52 v62 v72) */
- "punpcklhw %[tmp10], %[tmp1], %[tmp3] \n\t"
- /* tmp11 = (u43 u53 u63 u73 v43 v53 v63 v73) */
- "punpckhhw %[tmp11], %[tmp1], %[tmp3] \n\t"
-
- /* tmp0 = (u00 u10 u20 u30 u40 u50 u60 u70) */
- "punpcklwd %[tmp0], %[tmp4], %[tmp8] \n\t"
- /* tmp1 = (v00 v10 v20 v30 v40 v50 v60 v70) */
- "punpckhwd %[tmp1], %[tmp4], %[tmp8] \n\t"
- "gssdlc1 %[tmp0], 0x07(%[dst_a]) \n\t"
- "gssdrc1 %[tmp0], 0x00(%[dst_a]) \n\t"
- "gssdlc1 %[tmp1], 0x07(%[dst_b]) \n\t"
- "gssdrc1 %[tmp1], 0x00(%[dst_b]) \n\t"
-
- /* tmp0 = (u01 u11 u21 u31 u41 u51 u61 u71) */
- "punpcklwd %[tmp0], %[tmp5], %[tmp9] \n\t"
- /* tmp1 = (v01 v11 v21 v31 v41 v51 v61 v71) */
- "punpckhwd %[tmp1], %[tmp5], %[tmp9] \n\t"
- "dadd %[dst_a], %[dst_a], %[dst_stride_a] \n\t"
- "gssdlc1 %[tmp0], 0x07(%[dst_a]) \n\t"
- "gssdrc1 %[tmp0], 0x00(%[dst_a]) \n\t"
- "dadd %[dst_b], %[dst_b], %[dst_stride_b] \n\t"
- "gssdlc1 %[tmp1], 0x07(%[dst_b]) \n\t"
- "gssdrc1 %[tmp1], 0x00(%[dst_b]) \n\t"
-
- /* tmp0 = (u02 u12 u22 u32 u42 u52 u62 u72) */
- "punpcklwd %[tmp0], %[tmp6], %[tmp10] \n\t"
- /* tmp1 = (v02 v12 v22 v32 v42 v52 v62 v72) */
- "punpckhwd %[tmp1], %[tmp6], %[tmp10] \n\t"
- "dadd %[dst_a], %[dst_a], %[dst_stride_a] \n\t"
- "gssdlc1 %[tmp0], 0x07(%[dst_a]) \n\t"
- "gssdrc1 %[tmp0], 0x00(%[dst_a]) \n\t"
- "dadd %[dst_b], %[dst_b], %[dst_stride_b] \n\t"
- "gssdlc1 %[tmp1], 0x07(%[dst_b]) \n\t"
- "gssdrc1 %[tmp1], 0x00(%[dst_b]) \n\t"
-
- /* tmp0 = (u03 u13 u23 u33 u43 u53 u63 u73) */
- "punpcklwd %[tmp0], %[tmp7], %[tmp11] \n\t"
- /* tmp1 = (v03 v13 v23 v33 v43 v53 v63 v73) */
- "punpckhwd %[tmp1], %[tmp7], %[tmp11] \n\t"
- "dadd %[dst_a], %[dst_a], %[dst_stride_a] \n\t"
- "gssdlc1 %[tmp0], 0x07(%[dst_a]) \n\t"
- "gssdrc1 %[tmp0], 0x00(%[dst_a]) \n\t"
- "dadd %[dst_b], %[dst_b], %[dst_stride_b] \n\t"
- "gssdlc1 %[tmp1], 0x07(%[dst_b]) \n\t"
- "gssdrc1 %[tmp1], 0x00(%[dst_b]) \n\t"
-
- "dadd %[dst_a], %[dst_a], %[dst_stride_a] \n\t"
- "dadd %[dst_b], %[dst_b], %[dst_stride_b] \n\t"
- "daddiu %[src], %[src], 0x08 \n\t"
- "daddi %[width], %[width], -0x04 \n\t"
- "bnez %[width], 1b \n\t"
-
- : [tmp0] "=&f"(tmp0), [tmp1] "=&f"(tmp1), [tmp2] "=&f"(tmp2),
- [tmp3] "=&f"(tmp3), [tmp4] "=&f"(tmp4), [tmp5] "=&f"(tmp5),
- [tmp6] "=&f"(tmp6), [tmp7] "=&f"(tmp7), [tmp8] "=&f"(tmp8),
- [tmp9] "=&f"(tmp9), [tmp10] "=&f"(tmp10), [tmp11] "=&f"(tmp11),
- [tmp12] "=&f"(tmp12), [tmp13] "=&f"(tmp13), [dst_a] "+&r"(dst_a),
- [dst_b] "+&r"(dst_b), [src_tmp] "+&r"(src_tmp)
- : [src] "r"(src), [width] "r"(width), [dst_stride_a] "r"(dst_stride_a),
- [dst_stride_b] "r"(dst_stride_b), [src_stride] "r"(src_stride)
- : "memory");
-}
-
-#endif // !defined(LIBYUV_DISABLE_MMI) && defined(_MIPS_ARCH_LOONGSON3A)
-
-#ifdef __cplusplus
-} // extern "C"
-} // namespace libyuv
-#endif
diff --git a/files/source/row_mmi.cc b/files/source/row_mmi.cc
deleted file mode 100644
index 362fd1cf..00000000
--- a/files/source/row_mmi.cc
+++ /dev/null
@@ -1,7842 +0,0 @@
-/*
- * Copyright 2011 The LibYuv Project Authors. All rights reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-#include "libyuv/row.h"
-
-#include <string.h> // For memcpy and memset.
-
-#include "libyuv/basic_types.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-// This module is for Mips MMI.
-#if !defined(LIBYUV_DISABLE_MMI) && defined(_MIPS_ARCH_LOONGSON3A)
-
-// clang-format off
-
-void RGB24ToARGBRow_MMI(const uint8_t* src_rgb24,
- uint8_t* dst_argb,
- int width) {
- uint64_t src0, src1, dest;
- const uint64_t mask = 0xff000000ULL;
-
- __asm__ volatile(
- "1: \n\t"
- "gslwlc1 %[src0], 0x03(%[src_ptr]) \n\t"
- "gslwrc1 %[src0], 0x00(%[src_ptr]) \n\t"
- "gslwlc1 %[src1], 0x06(%[src_ptr]) \n\t"
- "gslwrc1 %[src1], 0x03(%[src_ptr]) \n\t"
-
- "or %[src0], %[src0], %[mask] \n\t"
- "or %[src1], %[src1], %[mask] \n\t"
- "punpcklwd %[dest], %[src0], %[src1] \n\t"
- "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
- "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
-
- "gslwlc1 %[src0], 0x09(%[src_ptr]) \n\t"
- "gslwrc1 %[src0], 0x06(%[src_ptr]) \n\t"
- "gslwlc1 %[src1], 0x0c(%[src_ptr]) \n\t"
- "gslwrc1 %[src1], 0x09(%[src_ptr]) \n\t"
-
- "or %[src0], %[src0], %[mask] \n\t"
- "or %[src1], %[src1], %[mask] \n\t"
- "punpcklwd %[dest], %[src0], %[src1] \n\t"
- "gssdlc1 %[dest], 0x0f(%[dst_ptr]) \n\t"
- "gssdrc1 %[dest], 0x08(%[dst_ptr]) \n\t"
-
- "daddiu %[src_ptr], %[src_ptr], 0x0c \n\t"
- "daddiu %[dst_ptr], %[dst_ptr], 0x10 \n\t"
- "daddi %[width], %[width], -0x04 \n\t"
- "bnez %[width], 1b \n\t"
- : [src0] "=&f"(src0), [src1] "=&f"(src1), [dest] "=&f"(dest)
- : [src_ptr] "r"(src_rgb24), [dst_ptr] "r"(dst_argb), [width] "r"(width),
- [mask] "f"(mask)
- : "memory");
-}
-
-void RAWToARGBRow_MMI(const uint8_t* src_raw, uint8_t* dst_argb, int width) {
- uint64_t src0, src1, dest;
- const uint64_t mask0 = 0x0;
- const uint64_t mask1 = 0xff000000ULL;
- const uint64_t mask2 = 0xc6;
-
- __asm__ volatile(
- "1: \n\t"
- "gslwlc1 %[src0], 0x03(%[src_ptr]) \n\t"
- "gslwrc1 %[src0], 0x00(%[src_ptr]) \n\t"
- "gslwlc1 %[src1], 0x06(%[src_ptr]) \n\t"
- "gslwrc1 %[src1], 0x03(%[src_ptr]) \n\t"
-
- "or %[src0], %[src0], %[mask1] \n\t"
- "punpcklbh %[src0], %[src0], %[mask0] \n\t"
- "pshufh %[src0], %[src0], %[mask2] \n\t"
- "or %[src1], %[src1], %[mask1] \n\t"
- "punpcklbh %[src1], %[src1], %[mask0] \n\t"
- "pshufh %[src1], %[src1], %[mask2] \n\t"
- "packushb %[dest], %[src0], %[src1] \n\t"
- "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
- "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
-
- "gslwlc1 %[src0], 0x09(%[src_ptr]) \n\t"
- "gslwrc1 %[src0], 0x06(%[src_ptr]) \n\t"
- "gslwlc1 %[src1], 0x0c(%[src_ptr]) \n\t"
- "gslwrc1 %[src1], 0x09(%[src_ptr]) \n\t"
-
- "or %[src0], %[src0], %[mask1] \n\t"
- "punpcklbh %[src0], %[src0], %[mask0] \n\t"
- "pshufh %[src0], %[src0], %[mask2] \n\t"
- "or %[src1], %[src1], %[mask1] \n\t"
- "punpcklbh %[src1], %[src1], %[mask0] \n\t"
- "pshufh %[src1], %[src1], %[mask2] \n\t"
- "packushb %[dest], %[src0], %[src1] \n\t"
- "gssdlc1 %[dest], 0x0f(%[dst_ptr]) \n\t"
- "gssdrc1 %[dest], 0x08(%[dst_ptr]) \n\t"
-
- "daddiu %[src_ptr], %[src_ptr], 0x0c \n\t"
- "daddiu %[dst_ptr], %[dst_ptr], 0x10 \n\t"
- "daddi %[width], %[width], -0x04 \n\t"
- "bnez %[width], 1b \n\t"
- : [src0] "=&f"(src0), [src1] "=&f"(src1), [dest] "=&f"(dest)
- : [src_ptr] "r"(src_raw), [dst_ptr] "r"(dst_argb), [mask0] "f"(mask0),
- [mask1] "f"(mask1), [mask2] "f"(mask2), [width] "r"(width)
- : "memory");
-}
-
-void RAWToRGB24Row_MMI(const uint8_t* src_raw, uint8_t* dst_rgb24, int width) {
- uint64_t src0, src1;
- uint64_t ftmp[4];
- uint64_t mask0 = 0xc6;
- uint64_t mask1 = 0x6c;
-
- __asm__ volatile(
- "1: \n\t"
- "gsldrc1 %[src0], 0x00(%[src_raw]) \n\t"
- "gsldlc1 %[src0], 0x07(%[src_raw]) \n\t"
- "gslwrc1 %[src1], 0x08(%[src_raw]) \n\t"
- "gslwlc1 %[src1], 0x0b(%[src_raw]) \n\t"
-
- "punpcklbh %[ftmp0], %[src0], %[zero] \n\t"
- "pshufh %[ftmp0], %[ftmp0], %[mask0] \n\t"
- "punpckhbh %[ftmp1], %[src0], %[zero] \n\t"
- "punpcklbh %[src1], %[src1], %[zero] \n\t"
- "pextrh %[ftmp2], %[ftmp0], %[three] \n\t"
- "pextrh %[ftmp3], %[ftmp1], %[one] \n\t"
- "pinsrh_3 %[ftmp0], %[ftmp0], %[ftmp3] \n\t"
- "pextrh %[ftmp3], %[ftmp1], %[two] \n\t"
- "pinsrh_1 %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
- "pshufh %[src1], %[src1], %[mask1] \n\t"
- "pextrh %[ftmp2], %[src1], %[zero] \n\t"
- "pinsrh_2 %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
- "pinsrh_0 %[src1], %[src1], %[ftmp3] \n\t"
- "packushb %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
- "packushb %[src1], %[src1], %[zero] \n\t"
-
- "gssdrc1 %[ftmp0], 0x00(%[dst_rgb24]) \n\t"
- "gssdlc1 %[ftmp0], 0x07(%[dst_rgb24]) \n\t"
- "gsswrc1 %[src1], 0x08(%[dst_rgb24]) \n\t"
- "gsswlc1 %[src1], 0x0b(%[dst_rgb24]) \n\t"
-
- "daddiu %[src_raw], %[src_raw], 0x0c \n\t"
- "daddiu %[dst_rgb24], %[dst_rgb24], 0x0c \n\t"
- "daddiu %[width], %[width], -0x04 \n\t"
- "bgtz %[width], 1b \n\t"
- : [src0] "=&f"(src0), [src1] "=&f"(src1), [ftmp0] "=&f"(ftmp[0]),
- [ftmp1] "=&f"(ftmp[1]), [ftmp2] "=&f"(ftmp[2]), [ftmp3] "=&f"(ftmp[3])
- : [src_raw] "r"(src_raw), [dst_rgb24] "r"(dst_rgb24), [width] "r"(width),
- [mask0] "f"(mask0), [mask1] "f"(mask1), [zero] "f"(0x00),
- [one] "f"(0x01), [two] "f"(0x02), [three] "f"(0x03)
- : "memory");
-}
-
-void RGB565ToARGBRow_MMI(const uint8_t* src_rgb565,
- uint8_t* dst_argb,
- int width) {
- uint64_t ftmp[5];
- uint64_t c0 = 0x001f001f001f001f;
- uint64_t c1 = 0x00ff00ff00ff00ff;
- uint64_t c2 = 0x0007000700070007;
- __asm__ volatile(
- "1: \n\t"
- "gsldrc1 %[src0], 0x00(%[src_rgb565]) \n\t"
- "gsldlc1 %[src0], 0x07(%[src_rgb565]) \n\t"
- "psrlh %[src1], %[src0], %[eight] \n\t"
- "and %[b], %[src0], %[c0] \n\t"
- "and %[src0], %[src0], %[c1] \n\t"
- "psrlh %[src0], %[src0], %[five] \n\t"
- "and %[g], %[src1], %[c2] \n\t"
- "psllh %[g], %[g], %[three] \n\t"
- "or %[g], %[src0], %[g] \n\t"
- "psrlh %[r], %[src1], %[three] \n\t"
- "psllh %[src0], %[b], %[three] \n\t"
- "psrlh %[src1], %[b], %[two] \n\t"
- "or %[b], %[src0], %[src1] \n\t"
- "psllh %[src0], %[g], %[two] \n\t"
- "psrlh %[src1], %[g], %[four] \n\t"
- "or %[g], %[src0], %[src1] \n\t"
- "psllh %[src0], %[r], %[three] \n\t"
- "psrlh %[src1], %[r], %[two] \n\t"
- "or %[r], %[src0], %[src1] \n\t"
- "packushb %[b], %[b], %[r] \n\t"
- "packushb %[g], %[g], %[c1] \n\t"
- "punpcklbh %[src0], %[b], %[g] \n\t"
- "punpckhbh %[src1], %[b], %[g] \n\t"
- "punpcklhw %[r], %[src0], %[src1] \n\t"
- "gssdrc1 %[r], 0x00(%[dst_argb]) \n\t"
- "gssdlc1 %[r], 0x07(%[dst_argb]) \n\t"
- "punpckhhw %[r], %[src0], %[src1] \n\t"
- "gssdrc1 %[r], 0x08(%[dst_argb]) \n\t"
- "gssdlc1 %[r], 0x0f(%[dst_argb]) \n\t"
- "daddiu %[src_rgb565], %[src_rgb565], 0x08 \n\t"
- "daddiu %[dst_argb], %[dst_argb], 0x10 \n\t"
- "daddiu %[width], %[width], -0x04 \n\t"
- "bgtz %[width], 1b \n\t"
- : [src0] "=&f"(ftmp[0]), [src1] "=&f"(ftmp[1]), [b] "=&f"(ftmp[2]),
- [g] "=&f"(ftmp[3]), [r] "=&f"(ftmp[4])
- : [src_rgb565] "r"(src_rgb565), [dst_argb] "r"(dst_argb),
- [width] "r"(width), [c0] "f"(c0), [c1] "f"(c1), [c2] "f"(c2),
- [eight] "f"(0x08), [five] "f"(0x05), [three] "f"(0x03), [two] "f"(0x02),
- [four] "f"(0x04)
- : "memory");
-}
-
-void ARGB1555ToARGBRow_MMI(const uint8_t* src_argb1555,
- uint8_t* dst_argb,
- int width) {
- uint64_t ftmp[6];
- uint64_t c0 = 0x001f001f001f001f;
- uint64_t c1 = 0x00ff00ff00ff00ff;
- uint64_t c2 = 0x0003000300030003;
- uint64_t c3 = 0x007c007c007c007c;
- uint64_t c4 = 0x0001000100010001;
- __asm__ volatile(
- "1: \n\t"
- "gsldrc1 %[src0], 0x00(%[src_argb1555]) \n\t"
- "gsldlc1 %[src0], 0x07(%[src_argb1555]) \n\t"
- "psrlh %[src1], %[src0], %[eight] \n\t"
- "and %[b], %[src0], %[c0] \n\t"
- "and %[src0], %[src0], %[c1] \n\t"
- "psrlh %[src0], %[src0], %[five] \n\t"
- "and %[g], %[src1], %[c2] \n\t"
- "psllh %[g], %[g], %[three] \n\t"
- "or %[g], %[src0], %[g] \n\t"
- "and %[r], %[src1], %[c3] \n\t"
- "psrlh %[r], %[r], %[two] \n\t"
- "psrlh %[a], %[src1], %[seven] \n\t"
- "psllh %[src0], %[b], %[three] \n\t"
- "psrlh %[src1], %[b], %[two] \n\t"
- "or %[b], %[src0], %[src1] \n\t"
- "psllh %[src0], %[g], %[three] \n\t"
- "psrlh %[src1], %[g], %[two] \n\t"
- "or %[g], %[src0], %[src1] \n\t"
- "psllh %[src0], %[r], %[three] \n\t"
- "psrlh %[src1], %[r], %[two] \n\t"
- "or %[r], %[src0], %[src1] \n\t"
- "xor %[a], %[a], %[c1] \n\t"
- "paddb %[a], %[a], %[c4] \n\t"
- "packushb %[b], %[b], %[r] \n\t"
- "packushb %[g], %[g], %[a] \n\t"
- "punpcklbh %[src0], %[b], %[g] \n\t"
- "punpckhbh %[src1], %[b], %[g] \n\t"
- "punpcklhw %[r], %[src0], %[src1] \n\t"
- "gssdrc1 %[r], 0x00(%[dst_argb]) \n\t"
- "gssdlc1 %[r], 0x07(%[dst_argb]) \n\t"
- "punpckhhw %[r], %[src0], %[src1] \n\t"
- "gssdrc1 %[r], 0x08(%[dst_argb]) \n\t"
- "gssdlc1 %[r], 0x0f(%[dst_argb]) \n\t"
- "daddiu %[src_argb1555], %[src_argb1555], 0x08 \n\t"
- "daddiu %[dst_argb], %[dst_argb], 0x10 \n\t"
- "daddiu %[width], %[width], -0x04 \n\t"
- "bgtz %[width], 1b \n\t"
- : [src0] "=&f"(ftmp[0]), [src1] "=&f"(ftmp[1]), [b] "=&f"(ftmp[2]),
- [g] "=&f"(ftmp[3]), [r] "=&f"(ftmp[4]), [a] "=&f"(ftmp[5])
- : [src_argb1555] "r"(src_argb1555), [dst_argb] "r"(dst_argb),
- [width] "r"(width), [c0] "f"(c0), [c1] "f"(c1), [c2] "f"(c2),
- [c3] "f"(c3), [c4] "f"(c4), [eight] "f"(0x08), [five] "f"(0x05),
- [three] "f"(0x03), [two] "f"(0x02), [seven] "f"(0x07)
- : "memory");
-}
-
-void ARGB4444ToARGBRow_MMI(const uint8_t* src_argb4444,
- uint8_t* dst_argb,
- int width) {
- uint64_t ftmp[6];
- uint64_t c0 = 0x000f000f000f000f;
- uint64_t c1 = 0x00ff00ff00ff00ff;
- __asm__ volatile(
- "1: \n\t"
- "gsldrc1 %[src0], 0x00(%[src_argb4444]) \n\t"
- "gsldlc1 %[src0], 0x07(%[src_argb4444]) \n\t"
- "psrlh %[src1], %[src0], %[eight] \n\t"
- "and %[b], %[src0], %[c0] \n\t"
- "and %[src0], %[src0], %[c1] \n\t"
- "psrlh %[g], %[src0], %[four] \n\t"
- "and %[r], %[src1], %[c0] \n\t"
- "psrlh %[a], %[src1], %[four] \n\t"
- "psllh %[src0], %[b], %[four] \n\t"
- "or %[b], %[src0], %[b] \n\t"
- "psllh %[src0], %[g], %[four] \n\t"
- "or %[g], %[src0], %[g] \n\t"
- "psllh %[src0], %[r], %[four] \n\t"
- "or %[r], %[src0], %[r] \n\t"
- "psllh %[src0], %[a], %[four] \n\t"
- "or %[a], %[src0], %[a] \n\t"
- "packushb %[b], %[b], %[r] \n\t"
- "packushb %[g], %[g], %[a] \n\t"
- "punpcklbh %[src0], %[b], %[g] \n\t"
- "punpckhbh %[src1], %[b], %[g] \n\t"
- "punpcklhw %[r], %[src0], %[src1] \n\t"
- "gssdrc1 %[r], 0x00(%[dst_argb]) \n\t"
- "gssdlc1 %[r], 0x07(%[dst_argb]) \n\t"
- "punpckhhw %[r], %[src0], %[src1] \n\t"
- "gssdrc1 %[r], 0x08(%[dst_argb]) \n\t"
- "gssdlc1 %[r], 0x0f(%[dst_argb]) \n\t"
- "daddiu %[src_argb4444], %[src_argb4444], 0x08 \n\t"
- "daddiu %[dst_argb], %[dst_argb], 0x10 \n\t"
- "daddiu %[width], %[width], -0x04 \n\t"
- "bgtz %[width], 1b \n\t"
- : [src0] "=&f"(ftmp[0]), [src1] "=&f"(ftmp[1]), [b] "=&f"(ftmp[2]),
- [g] "=&f"(ftmp[3]), [r] "=&f"(ftmp[4]), [a] "=&f"(ftmp[5])
- : [src_argb4444] "r"(src_argb4444), [dst_argb] "r"(dst_argb),
- [width] "r"(width), [c0] "f"(c0), [c1] "f"(c1), [eight] "f"(0x08),
- [four] "f"(0x04)
- : "memory");
-}
-
-void ARGBToRGB24Row_MMI(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
- uint64_t src;
-
- __asm__ volatile(
- "1: \n\t"
- "gslwlc1 %[src], 0x03(%[src_ptr]) \n\t"
- "gslwrc1 %[src], 0x00(%[src_ptr]) \n\t"
- "gsswlc1 %[src], 0x03(%[dst_ptr]) \n\t"
- "gsswrc1 %[src], 0x00(%[dst_ptr]) \n\t"
-
- "gslwlc1 %[src], 0x07(%[src_ptr]) \n\t"
- "gslwrc1 %[src], 0x04(%[src_ptr]) \n\t"
- "gsswlc1 %[src], 0x06(%[dst_ptr]) \n\t"
- "gsswrc1 %[src], 0x03(%[dst_ptr]) \n\t"
-
- "gslwlc1 %[src], 0x0b(%[src_ptr]) \n\t"
- "gslwrc1 %[src], 0x08(%[src_ptr]) \n\t"
- "gsswlc1 %[src], 0x09(%[dst_ptr]) \n\t"
- "gsswrc1 %[src], 0x06(%[dst_ptr]) \n\t"
-
- "gslwlc1 %[src], 0x0f(%[src_ptr]) \n\t"
- "gslwrc1 %[src], 0x0c(%[src_ptr]) \n\t"
- "gsswlc1 %[src], 0x0c(%[dst_ptr]) \n\t"
- "gsswrc1 %[src], 0x09(%[dst_ptr]) \n\t"
-
- "daddiu %[src_ptr], %[src_ptr], 0x10 \n\t"
- "daddiu %[dst_ptr], %[dst_ptr], 0x0c \n\t"
- "daddi %[width], %[width], -0x04 \n\t"
- "bnez %[width], 1b \n\t"
- : [src] "=&f"(src)
- : [src_ptr] "r"(src_argb), [dst_ptr] "r"(dst_rgb), [width] "r"(width)
- : "memory");
-}
-
-void ARGBToRAWRow_MMI(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
- uint64_t src0, src1;
- uint64_t ftmp[3];
- uint64_t mask0 = 0xc6;
- uint64_t mask1 = 0x18;
-
- __asm__ volatile(
- "1: \n\t"
- "gsldrc1 %[src0], 0x00(%[src_argb]) \n\t"
- "gsldlc1 %[src0], 0x07(%[src_argb]) \n\t"
- "gsldrc1 %[src1], 0x08(%[src_argb]) \n\t"
- "gsldlc1 %[src1], 0x0f(%[src_argb]) \n\t"
-
- "punpcklbh %[ftmp0], %[src0], %[zero] \n\t"
- "pshufh %[ftmp0], %[ftmp0], %[mask0] \n\t"
- "punpckhbh %[ftmp1], %[src0], %[zero] \n\t"
- "punpcklbh %[ftmp2], %[src1], %[zero] \n\t"
- "punpckhbh %[src1], %[src1], %[zero] \n\t"
-
- "pextrh %[src0], %[ftmp1], %[two] \n\t"
- "pinsrh_3 %[ftmp0], %[ftmp0], %[src0] \n\t"
- "pshufh %[ftmp1], %[ftmp1], %[one] \n\t"
-
- "pextrh %[src0], %[ftmp2], %[two] \n\t"
- "pinsrh_2 %[ftmp1], %[ftmp1], %[src0] \n\t"
- "pextrh %[src0], %[ftmp2], %[one] \n\t"
- "pinsrh_3 %[ftmp1], %[ftmp1], %[src0] \n\t"
- "pextrh %[src0], %[ftmp2], %[zero] \n\t"
- "pshufh %[src1], %[src1], %[mask1] \n\t"
- "pinsrh_0 %[src1], %[src1], %[src0] \n\t"
- "packushb %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
- "packushb %[src1], %[src1], %[zero] \n\t"
-
- "gssdrc1 %[ftmp0], 0x00(%[dst_rgb]) \n\t"
- "gssdlc1 %[ftmp0], 0x07(%[dst_rgb]) \n\t"
- "gsswrc1 %[src1], 0x08(%[dst_rgb]) \n\t"
- "gsswlc1 %[src1], 0x0b(%[dst_rgb]) \n\t"
-
- "daddiu %[src_argb], %[src_argb], 0x10 \n\t"
- "daddiu %[dst_rgb], %[dst_rgb], 0x0c \n\t"
- "daddiu %[width], %[width], -0x04 \n\t"
- "bgtz %[width], 1b \n\t"
- : [src0] "=&f"(src0), [src1] "=&f"(src1), [ftmp0] "=&f"(ftmp[0]),
- [ftmp1] "=&f"(ftmp[1]), [ftmp2] "=&f"(ftmp[2])
- : [src_argb] "r"(src_argb), [dst_rgb] "r"(dst_rgb), [width] "r"(width),
- [mask0] "f"(mask0), [mask1] "f"(mask1), [zero] "f"(0x00),
- [one] "f"(0x01), [two] "f"(0x02)
- : "memory");
-}
-
-void ARGBToRGB565Row_MMI(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
- uint64_t src0, src1;
- uint64_t ftmp[3];
-
- __asm__ volatile(
- "1: \n\t"
- "gsldrc1 %[src0], 0x00(%[src_argb]) \n\t"
- "gsldlc1 %[src0], 0x07(%[src_argb]) \n\t"
- "gsldrc1 %[src1], 0x08(%[src_argb]) \n\t"
- "gsldlc1 %[src1], 0x0f(%[src_argb]) \n\t"
-
- "punpcklbh %[b], %[src0], %[src1] \n\t"
- "punpckhbh %[g], %[src0], %[src1] \n\t"
- "punpcklbh %[src0], %[b], %[g] \n\t"
- "punpckhbh %[src1], %[b], %[g] \n\t"
- "punpcklbh %[b], %[src0], %[zero] \n\t"
- "punpckhbh %[g], %[src0], %[zero] \n\t"
- "punpcklbh %[r], %[src1], %[zero] \n\t"
-
- "psrlh %[b], %[b], %[three] \n\t"
- "psrlh %[g], %[g], %[two] \n\t"
- "psrlh %[r], %[r], %[three] \n\t"
-
- "psllh %[g], %[g], %[five] \n\t"
- "psllh %[r], %[r], %[eleven] \n\t"
- "or %[b], %[b], %[g] \n\t"
- "or %[b], %[b], %[r] \n\t"
-
- "gssdrc1 %[b], 0x00(%[dst_rgb]) \n\t"
- "gssdlc1 %[b], 0x07(%[dst_rgb]) \n\t"
-
- "daddiu %[src_argb], %[src_argb], 0x10 \n\t"
- "daddiu %[dst_rgb], %[dst_rgb], 0x08 \n\t"
- "daddiu %[width], %[width], -0x04 \n\t"
- "bgtz %[width], 1b \n\t"
- : [src0] "=&f"(src0), [src1] "=&f"(src1), [b] "=&f"(ftmp[0]),
- [g] "=&f"(ftmp[1]), [r] "=&f"(ftmp[2])
- : [src_argb] "r"(src_argb), [dst_rgb] "r"(dst_rgb), [width] "r"(width),
- [zero] "f"(0x00), [two] "f"(0x02), [three] "f"(0x03), [five] "f"(0x05),
- [eleven] "f"(0x0b)
- : "memory");
-}
-
-// dither4 is a row of 4 values from 4x4 dither matrix.
-// The 4x4 matrix contains values to increase RGB. When converting to
-// fewer bits (565) this provides an ordered dither.
-// The order in the 4x4 matrix in first byte is upper left.
-// The 4 values are passed as an int, then referenced as an array, so
-// endian will not affect order of the original matrix. But the dither4
-// will containing the first pixel in the lower byte for little endian
-// or the upper byte for big endian.
-void ARGBToRGB565DitherRow_MMI(const uint8_t* src_argb,
- uint8_t* dst_rgb,
- const uint32_t dither4,
- int width) {
- uint64_t src0, src1;
- uint64_t ftmp[3];
- uint64_t c0 = 0x00ff00ff00ff00ff;
-
- __asm__ volatile(
- "punpcklbh %[dither], %[dither], %[zero] \n\t"
- "1: \n\t"
- "gsldrc1 %[src0], 0x00(%[src_argb]) \n\t"
- "gsldlc1 %[src0], 0x07(%[src_argb]) \n\t"
- "gsldrc1 %[src1], 0x08(%[src_argb]) \n\t"
- "gsldlc1 %[src1], 0x0f(%[src_argb]) \n\t"
-
- "punpcklbh %[b], %[src0], %[src1] \n\t"
- "punpckhbh %[g], %[src0], %[src1] \n\t"
- "punpcklbh %[src0], %[b], %[g] \n\t"
- "punpckhbh %[src1], %[b], %[g] \n\t"
- "punpcklbh %[b], %[src0], %[zero] \n\t"
- "punpckhbh %[g], %[src0], %[zero] \n\t"
- "punpcklbh %[r], %[src1], %[zero] \n\t"
-
- "paddh %[b], %[b], %[dither] \n\t"
- "paddh %[g], %[g], %[dither] \n\t"
- "paddh %[r], %[r], %[dither] \n\t"
- "pcmpgth %[src0], %[b], %[c0] \n\t"
- "or %[src0], %[src0], %[b] \n\t"
- "and %[b], %[src0], %[c0] \n\t"
- "pcmpgth %[src0], %[g], %[c0] \n\t"
- "or %[src0], %[src0], %[g] \n\t"
- "and %[g], %[src0], %[c0] \n\t"
- "pcmpgth %[src0], %[r], %[c0] \n\t"
- "or %[src0], %[src0], %[r] \n\t"
- "and %[r], %[src0], %[c0] \n\t"
-
- "psrlh %[b], %[b], %[three] \n\t"
- "psrlh %[g], %[g], %[two] \n\t"
- "psrlh %[r], %[r], %[three] \n\t"
-
- "psllh %[g], %[g], %[five] \n\t"
- "psllh %[r], %[r], %[eleven] \n\t"
- "or %[b], %[b], %[g] \n\t"
- "or %[b], %[b], %[r] \n\t"
-
- "gssdrc1 %[b], 0x00(%[dst_rgb]) \n\t"
- "gssdlc1 %[b], 0x07(%[dst_rgb]) \n\t"
-
- "daddiu %[src_argb], %[src_argb], 0x10 \n\t"
- "daddiu %[dst_rgb], %[dst_rgb], 0x08 \n\t"
- "daddiu %[width], %[width], -0x04 \n\t"
- "bgtz %[width], 1b \n\t"
- : [src0] "=&f"(src0), [src1] "=&f"(src1), [b] "=&f"(ftmp[0]),
- [g] "=&f"(ftmp[1]), [r] "=&f"(ftmp[2])
- : [src_argb] "r"(src_argb), [dst_rgb] "r"(dst_rgb), [width] "r"(width),
- [dither] "f"(dither4), [c0] "f"(c0), [zero] "f"(0x00), [two] "f"(0x02),
- [three] "f"(0x03), [five] "f"(0x05), [eleven] "f"(0x0b)
- : "memory");
-}
-
-void ARGBToARGB1555Row_MMI(const uint8_t* src_argb,
- uint8_t* dst_rgb,
- int width) {
- uint64_t src0, src1;
- uint64_t ftmp[4];
-
- __asm__ volatile(
- "1: \n\t"
- "gsldrc1 %[src0], 0x00(%[src_argb]) \n\t"
- "gsldlc1 %[src0], 0x07(%[src_argb]) \n\t"
- "gsldrc1 %[src1], 0x08(%[src_argb]) \n\t"
- "gsldlc1 %[src1], 0x0f(%[src_argb]) \n\t"
-
- "punpcklbh %[b], %[src0], %[src1] \n\t"
- "punpckhbh %[g], %[src0], %[src1] \n\t"
- "punpcklbh %[src0], %[b], %[g] \n\t"
- "punpckhbh %[src1], %[b], %[g] \n\t"
- "punpcklbh %[b], %[src0], %[zero] \n\t"
- "punpckhbh %[g], %[src0], %[zero] \n\t"
- "punpcklbh %[r], %[src1], %[zero] \n\t"
- "punpckhbh %[a], %[src1], %[zero] \n\t"
-
- "psrlh %[b], %[b], %[three] \n\t"
- "psrlh %[g], %[g], %[three] \n\t"
- "psrlh %[r], %[r], %[three] \n\t"
- "psrlh %[a], %[a], %[seven] \n\t"
-
- "psllh %[g], %[g], %[five] \n\t"
- "psllh %[r], %[r], %[ten] \n\t"
- "psllh %[a], %[a], %[fifteen] \n\t"
- "or %[b], %[b], %[g] \n\t"
- "or %[b], %[b], %[r] \n\t"
- "or %[b], %[b], %[a] \n\t"
-
- "gssdrc1 %[b], 0x00(%[dst_rgb]) \n\t"
- "gssdlc1 %[b], 0x07(%[dst_rgb]) \n\t"
-
- "daddiu %[src_argb], %[src_argb], 0x10 \n\t"
- "daddiu %[dst_rgb], %[dst_rgb], 0x08 \n\t"
- "daddiu %[width], %[width], -0x04 \n\t"
- "bgtz %[width], 1b \n\t"
- : [src0] "=&f"(src0), [src1] "=&f"(src1), [b] "=&f"(ftmp[0]),
- [g] "=&f"(ftmp[1]), [r] "=&f"(ftmp[2]), [a] "=&f"(ftmp[3])
- : [src_argb] "r"(src_argb), [dst_rgb] "r"(dst_rgb), [width] "r"(width),
- [zero] "f"(0x00), [three] "f"(0x03), [five] "f"(0x05),
- [seven] "f"(0x07), [ten] "f"(0x0a), [fifteen] "f"(0x0f)
- : "memory");
-}
-
-void ARGBToARGB4444Row_MMI(const uint8_t* src_argb,
- uint8_t* dst_rgb,
- int width) {
- uint64_t src0, src1;
- uint64_t ftmp[4];
-
- __asm__ volatile(
- "1: \n\t"
- "gsldrc1 %[src0], 0x00(%[src_argb]) \n\t"
- "gsldlc1 %[src0], 0x07(%[src_argb]) \n\t"
- "gsldrc1 %[src1], 0x08(%[src_argb]) \n\t"
- "gsldlc1 %[src1], 0x0f(%[src_argb]) \n\t"
-
- "punpcklbh %[b], %[src0], %[src1] \n\t"
- "punpckhbh %[g], %[src0], %[src1] \n\t"
- "punpcklbh %[src0], %[b], %[g] \n\t"
- "punpckhbh %[src1], %[b], %[g] \n\t"
- "punpcklbh %[b], %[src0], %[zero] \n\t"
- "punpckhbh %[g], %[src0], %[zero] \n\t"
- "punpcklbh %[r], %[src1], %[zero] \n\t"
- "punpckhbh %[a], %[src1], %[zero] \n\t"
-
- "psrlh %[b], %[b], %[four] \n\t"
- "psrlh %[g], %[g], %[four] \n\t"
- "psrlh %[r], %[r], %[four] \n\t"
- "psrlh %[a], %[a], %[four] \n\t"
-
- "psllh %[g], %[g], %[four] \n\t"
- "psllh %[r], %[r], %[eight] \n\t"
- "psllh %[a], %[a], %[twelve] \n\t"
- "or %[b], %[b], %[g] \n\t"
- "or %[b], %[b], %[r] \n\t"
- "or %[b], %[b], %[a] \n\t"
-
- "gssdrc1 %[b], 0x00(%[dst_rgb]) \n\t"
- "gssdlc1 %[b], 0x07(%[dst_rgb]) \n\t"
-
- "daddiu %[src_argb], %[src_argb], 0x10 \n\t"
- "daddiu %[dst_rgb], %[dst_rgb], 0x08 \n\t"
- "daddiu %[width], %[width], -0x04 \n\t"
- "bgtz %[width], 1b \n\t"
- : [src0] "=&f"(src0), [src1] "=&f"(src1), [b] "=&f"(ftmp[0]),
- [g] "=&f"(ftmp[1]), [r] "=&f"(ftmp[2]), [a] "=&f"(ftmp[3])
- : [src_argb] "r"(src_argb), [dst_rgb] "r"(dst_rgb), [width] "r"(width),
- [zero] "f"(0x00), [four] "f"(0x04), [eight] "f"(0x08),
- [twelve] "f"(0x0c)
- : "memory");
-}
-
-void ARGBToYRow_MMI(const uint8_t* src_argb, uint8_t* dst_y, int width) {
- uint64_t src, src_hi, src_lo;
- uint64_t dest0, dest1, dest2, dest3;
- const uint64_t value = 0x1080;
- const uint64_t mask = 0x0001004200810019;
-
- __asm__ volatile(
- "1: \n\t"
- "gsldlc1 %[src], 0x07(%[src_argb]) \n\t"
- "gsldrc1 %[src], 0x00(%[src_argb]) \n\t"
- "punpcklbh %[src_lo], %[src], %[zero] \n\t"
- "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
- "punpckhbh %[src_hi], %[src], %[zero] \n\t"
- "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
- "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
- "punpckhwd %[dest0], %[src_lo], %[src_hi] \n\t"
- "paddw %[dest0], %[dest0], %[src] \n\t"
- "psrlw %[dest0], %[dest0], %[eight] \n\t"
-
- "gsldlc1 %[src], 0x0f(%[src_argb]) \n\t"
- "gsldrc1 %[src], 0x08(%[src_argb]) \n\t"
- "punpcklbh %[src_lo], %[src], %[zero] \n\t"
- "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
- "punpckhbh %[src_hi], %[src], %[zero] \n\t"
- "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
- "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
- "punpckhwd %[dest1], %[src_lo], %[src_hi] \n\t"
- "paddw %[dest1], %[dest1], %[src] \n\t"
- "psrlw %[dest1], %[dest1], %[eight] \n\t"
-
- "gsldlc1 %[src], 0x17(%[src_argb]) \n\t"
- "gsldrc1 %[src], 0x10(%[src_argb]) \n\t"
- "punpcklbh %[src_lo], %[src], %[zero] \n\t"
- "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
- "punpckhbh %[src_hi], %[src], %[zero] \n\t"
- "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
- "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
- "punpckhwd %[dest2], %[src_lo], %[src_hi] \n\t"
- "paddw %[dest2], %[dest2], %[src] \n\t"
- "psrlw %[dest2], %[dest2], %[eight] \n\t"
-
- "gsldlc1 %[src], 0x1f(%[src_argb]) \n\t"
- "gsldrc1 %[src], 0x18(%[src_argb]) \n\t"
- "punpcklbh %[src_lo], %[src], %[zero] \n\t"
- "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
- "punpckhbh %[src_hi], %[src], %[zero] \n\t"
- "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
- "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
- "punpckhwd %[dest3], %[src_lo], %[src_hi] \n\t"
- "paddw %[dest3], %[dest3], %[src] \n\t"
- "psrlw %[dest3], %[dest3], %[eight] \n\t"
-
- "packsswh %[src_lo], %[dest0], %[dest1] \n\t"
- "packsswh %[src_hi], %[dest2], %[dest3] \n\t"
- "packushb %[dest0], %[src_lo], %[src_hi] \n\t"
- "gssdlc1 %[dest0], 0x07(%[dst_y]) \n\t"
- "gssdrc1 %[dest0], 0x00(%[dst_y]) \n\t"
-
- "daddiu %[src_argb], %[src_argb], 0x20 \n\t"
- "daddiu %[dst_y], %[dst_y], 0x08 \n\t"
- "daddi %[width], %[width], -0x08 \n\t"
- "bnez %[width], 1b \n\t"
- : [src] "=&f"(src), [src_hi] "=&f"(src_hi), [src_lo] "=&f"(src_lo),
- [dest0] "=&f"(dest0), [dest1] "=&f"(dest1), [dest2] "=&f"(dest2),
- [dest3] "=&f"(dest3)
- : [src_argb] "r"(src_argb), [dst_y] "r"(dst_y), [width] "r"(width),
- [mask] "f"(mask), [value] "f"(value), [eight] "f"(0x08),
- [zero] "f"(0x00)
- : "memory");
-}
-
-void ARGBToUVRow_MMI(const uint8_t* src_rgb,
- int src_stride_rgb,
- uint8_t* dst_u,
- uint8_t* dst_v,
- int width) {
- uint64_t src_rgb1;
- uint64_t ftmp[13];
- uint64_t tmp[1];
- const uint64_t value = 0x4040;
- const uint64_t mask_u = 0x0013002500380002;
- const uint64_t mask_v = 0x00020038002f0009;
-
- __asm__ volatile(
- "dli %[tmp0], 0x0001000100010001 \n\t"
- "dmtc1 %[tmp0], %[ftmp12] \n\t"
- "1: \n\t"
- "daddu %[src_rgb1], %[src_rgb], %[src_stride_rgb] \n\t"
- "gsldrc1 %[src0], 0x00(%[src_rgb]) \n\t"
- "gsldlc1 %[src0], 0x07(%[src_rgb]) \n\t"
- "gsldrc1 %[src1], 0x00(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x07(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "paddh %[src0], %[src0], %[ftmp12] \n\t"
- "psrlh %[src0], %[src0], %[one] \n\t"
- "dsll %[dest0_u], %[src0], %[sixteen] \n\t"
- "pinsrh_0 %[dest0_u], %[dest0_u], %[value] \n\t"
- "pinsrh_3 %[dest0_v], %[src0], %[value] \n\t"
- "pmaddhw %[dest0_u], %[dest0_u], %[mask_u] \n\t"
- "pmaddhw %[dest0_v], %[dest0_v], %[mask_v] \n\t"
-
- "gsldrc1 %[src0], 0x08(%[src_rgb]) \n\t"
- "gsldlc1 %[src0], 0x0f(%[src_rgb]) \n\t"
- "gsldrc1 %[src1], 0x08(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x0f(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "paddh %[src0], %[src0], %[ftmp12] \n\t"
- "psrlh %[src0], %[src0], %[one] \n\t"
- "dsll %[src_lo], %[src0], %[sixteen] \n\t"
- "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
- "pinsrh_3 %[src_hi], %[src0], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
-
- "punpcklwd %[src0], %[dest0_u], %[src_lo] \n\t"
- "punpckhwd %[src1], %[dest0_u], %[src_lo] \n\t"
- "psubw %[dest0_u], %[src0], %[src1] \n\t"
- "psraw %[dest0_u], %[dest0_u], %[eight] \n\t"
- "punpcklwd %[src0], %[dest0_v], %[src_hi] \n\t"
- "punpckhwd %[src1], %[dest0_v], %[src_hi] \n\t"
- "psubw %[dest0_v], %[src1], %[src0] \n\t"
- "psraw %[dest0_v], %[dest0_v], %[eight] \n\t"
-
- "gsldrc1 %[src0], 0x10(%[src_rgb]) \n\t"
- "gsldlc1 %[src0], 0x17(%[src_rgb]) \n\t"
- "gsldrc1 %[src1], 0x10(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x17(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "paddh %[src0], %[src0], %[ftmp12] \n\t"
- "psrlh %[src0], %[src0], %[one] \n\t"
- "dsll %[dest1_u], %[src0], %[sixteen] \n\t"
- "pinsrh_0 %[dest1_u], %[dest1_u], %[value] \n\t"
- "pinsrh_3 %[dest1_v], %[src0], %[value] \n\t"
- "pmaddhw %[dest1_u], %[dest1_u], %[mask_u] \n\t"
- "pmaddhw %[dest1_v], %[dest1_v], %[mask_v] \n\t"
-
- "gsldrc1 %[src0], 0x18(%[src_rgb]) \n\t"
- "gsldlc1 %[src0], 0x1f(%[src_rgb]) \n\t"
- "gsldrc1 %[src1], 0x18(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x1f(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "paddh %[src0], %[src0], %[ftmp12] \n\t"
- "psrlh %[src0], %[src0], %[one] \n\t"
- "dsll %[src_lo], %[src0], %[sixteen] \n\t"
- "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
- "pinsrh_3 %[src_hi], %[src0], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
-
- "punpcklwd %[src0], %[dest1_u], %[src_lo] \n\t"
- "punpckhwd %[src1], %[dest1_u], %[src_lo] \n\t"
- "psubw %[dest1_u], %[src0], %[src1] \n\t"
- "psraw %[dest1_u], %[dest1_u], %[eight] \n\t"
- "punpcklwd %[src0], %[dest1_v], %[src_hi] \n\t"
- "punpckhwd %[src1], %[dest1_v], %[src_hi] \n\t"
- "psubw %[dest1_v], %[src1], %[src0] \n\t"
- "psraw %[dest1_v], %[dest1_v], %[eight] \n\t"
-
- "gsldrc1 %[src0], 0x20(%[src_rgb]) \n\t"
- "gsldlc1 %[src0], 0x27(%[src_rgb]) \n\t"
- "gsldrc1 %[src1], 0x20(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x27(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "paddh %[src0], %[src0], %[ftmp12] \n\t"
- "psrlh %[src0], %[src0], %[one] \n\t"
- "dsll %[dest2_u], %[src0], %[sixteen] \n\t"
- "pinsrh_0 %[dest2_u], %[dest2_u], %[value] \n\t"
- "pinsrh_3 %[dest2_v], %[src0], %[value] \n\t"
- "pmaddhw %[dest2_u], %[dest2_u], %[mask_u] \n\t"
- "pmaddhw %[dest2_v], %[dest2_v], %[mask_v] \n\t"
-
- "gsldrc1 %[src0], 0x28(%[src_rgb]) \n\t"
- "gsldlc1 %[src0], 0x2f(%[src_rgb]) \n\t"
- "gsldrc1 %[src1], 0x28(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x2f(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "paddh %[src0], %[src0], %[ftmp12] \n\t"
- "psrlh %[src0], %[src0], %[one] \n\t"
- "dsll %[src_lo], %[src0], %[sixteen] \n\t"
- "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
- "pinsrh_3 %[src_hi], %[src0], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
-
- "punpcklwd %[src0], %[dest2_u], %[src_lo] \n\t"
- "punpckhwd %[src1], %[dest2_u], %[src_lo] \n\t"
- "psubw %[dest2_u], %[src0], %[src1] \n\t"
- "psraw %[dest2_u], %[dest2_u], %[eight] \n\t"
- "punpcklwd %[src0], %[dest2_v], %[src_hi] \n\t"
- "punpckhwd %[src1], %[dest2_v], %[src_hi] \n\t"
- "psubw %[dest2_v], %[src1], %[src0] \n\t"
- "psraw %[dest2_v], %[dest2_v], %[eight] \n\t"
-
- "gsldrc1 %[src0], 0x30(%[src_rgb]) \n\t"
- "gsldlc1 %[src0], 0x37(%[src_rgb]) \n\t"
- "gsldrc1 %[src1], 0x30(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x37(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "paddh %[src0], %[src0], %[ftmp12] \n\t"
- "psrlh %[src0], %[src0], %[one] \n\t"
- "dsll %[dest3_u], %[src0], %[sixteen] \n\t"
- "pinsrh_0 %[dest3_u], %[dest3_u], %[value] \n\t"
- "pinsrh_3 %[dest3_v], %[src0], %[value] \n\t"
- "pmaddhw %[dest3_u], %[dest3_u], %[mask_u] \n\t"
- "pmaddhw %[dest3_v], %[dest3_v], %[mask_v] \n\t"
-
- "gsldrc1 %[src0], 0x38(%[src_rgb]) \n\t"
- "gsldlc1 %[src0], 0x3f(%[src_rgb]) \n\t"
- "gsldrc1 %[src1], 0x38(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x3f(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "paddh %[src0], %[src0], %[ftmp12] \n\t"
- "psrlh %[src0], %[src0], %[one] \n\t"
- "dsll %[src_lo], %[src0], %[sixteen] \n\t"
- "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
- "pinsrh_3 %[src_hi], %[src0], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
-
- "punpcklwd %[src0], %[dest3_u], %[src_lo] \n\t"
- "punpckhwd %[src1], %[dest3_u], %[src_lo] \n\t"
- "psubw %[dest3_u], %[src0], %[src1] \n\t"
- "psraw %[dest3_u], %[dest3_u], %[eight] \n\t"
- "punpcklwd %[src0], %[dest3_v], %[src_hi] \n\t"
- "punpckhwd %[src1], %[dest3_v], %[src_hi] \n\t"
- "psubw %[dest3_v], %[src1], %[src0] \n\t"
- "psraw %[dest3_v], %[dest3_v], %[eight] \n\t"
-
- "packsswh %[src0], %[dest0_u], %[dest1_u] \n\t"
- "packsswh %[src1], %[dest2_u], %[dest3_u] \n\t"
- "packushb %[dest0_u], %[src0], %[src1] \n\t"
- "gssdlc1 %[dest0_u], 0x07(%[dst_u]) \n\t"
- "gssdrc1 %[dest0_u], 0x00(%[dst_u]) \n\t"
-
- "packsswh %[src0], %[dest0_v], %[dest1_v] \n\t"
- "packsswh %[src1], %[dest2_v], %[dest3_v] \n\t"
- "packushb %[dest0_v], %[src0], %[src1] \n\t"
- "gssdlc1 %[dest0_v], 0x07(%[dst_v]) \n\t"
- "gssdrc1 %[dest0_v], 0x00(%[dst_v]) \n\t"
-
- "daddiu %[src_rgb], %[src_rgb], 0x40 \n\t"
- "daddiu %[dst_u], %[dst_u], 0x08 \n\t"
- "daddiu %[dst_v], %[dst_v], 0x08 \n\t"
- "daddi %[width], %[width], -0x10 \n\t"
- "bgtz %[width], 1b \n\t"
- : [src_rgb1] "=&r"(src_rgb1), [src0] "=&f"(ftmp[0]),
- [src1] "=&f"(ftmp[1]), [src_lo] "=&f"(ftmp[2]), [src_hi] "=&f"(ftmp[3]),
- [dest0_u] "=&f"(ftmp[4]), [dest0_v] "=&f"(ftmp[5]),
- [dest1_u] "=&f"(ftmp[6]), [dest1_v] "=&f"(ftmp[7]),
- [dest2_u] "=&f"(ftmp[8]), [dest2_v] "=&f"(ftmp[9]),
- [dest3_u] "=&f"(ftmp[10]), [dest3_v] "=&f"(ftmp[11]),
- [ftmp12] "=&f"(ftmp[12]), [tmp0] "=&r"(tmp[0])
- : [src_rgb] "r"(src_rgb), [src_stride_rgb] "r"(src_stride_rgb),
- [dst_u] "r"(dst_u), [dst_v] "r"(dst_v), [width] "r"(width),
- [mask_u] "f"(mask_u), [mask_v] "f"(mask_v), [value] "f"(value),
- [zero] "f"(0x00), [eight] "f"(0x08), [one] "f"(0x01),
- [sixteen] "f"(0x10)
- : "memory");
-}
-
-void BGRAToYRow_MMI(const uint8_t* src_argb, uint8_t* dst_y, int width) {
- uint64_t src, src_hi, src_lo;
- uint64_t dest0, dest1, dest2, dest3;
- const uint64_t value = 0x1080;
- const uint64_t mask = 0x0019008100420001;
-
- __asm__ volatile(
- "1: \n\t"
- "gsldlc1 %[src], 0x07(%[src_argb]) \n\t"
- "gsldrc1 %[src], 0x00(%[src_argb]) \n\t"
- "punpcklbh %[src_lo], %[src], %[zero] \n\t"
- "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
- "punpckhbh %[src_hi], %[src], %[zero] \n\t"
- "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
- "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
- "punpckhwd %[dest0], %[src_lo], %[src_hi] \n\t"
- "paddw %[dest0], %[dest0], %[src] \n\t"
- "psrlw %[dest0], %[dest0], %[eight] \n\t"
-
- "gsldlc1 %[src], 0x0f(%[src_argb]) \n\t"
- "gsldrc1 %[src], 0x08(%[src_argb]) \n\t"
- "punpcklbh %[src_lo], %[src], %[zero] \n\t"
- "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
- "punpckhbh %[src_hi], %[src], %[zero] \n\t"
- "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
- "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
- "punpckhwd %[dest1], %[src_lo], %[src_hi] \n\t"
- "paddw %[dest1], %[dest1], %[src] \n\t"
- "psrlw %[dest1], %[dest1], %[eight] \n\t"
-
- "gsldlc1 %[src], 0x17(%[src_argb]) \n\t"
- "gsldrc1 %[src], 0x10(%[src_argb]) \n\t"
- "punpcklbh %[src_lo], %[src], %[zero] \n\t"
- "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
- "punpckhbh %[src_hi], %[src], %[zero] \n\t"
- "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
- "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
- "punpckhwd %[dest2], %[src_lo], %[src_hi] \n\t"
- "paddw %[dest2], %[dest2], %[src] \n\t"
- "psrlw %[dest2], %[dest2], %[eight] \n\t"
-
- "gsldlc1 %[src], 0x1f(%[src_argb]) \n\t"
- "gsldrc1 %[src], 0x18(%[src_argb]) \n\t"
- "punpcklbh %[src_lo], %[src], %[zero] \n\t"
- "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
- "punpckhbh %[src_hi], %[src], %[zero] \n\t"
- "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
- "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
- "punpckhwd %[dest3], %[src_lo], %[src_hi] \n\t"
- "paddw %[dest3], %[dest3], %[src] \n\t"
- "psrlw %[dest3], %[dest3], %[eight] \n\t"
-
- "packsswh %[src_lo], %[dest0], %[dest1] \n\t"
- "packsswh %[src_hi], %[dest2], %[dest3] \n\t"
- "packushb %[dest0], %[src_lo], %[src_hi] \n\t"
- "gssdlc1 %[dest0], 0x07(%[dst_y]) \n\t"
- "gssdrc1 %[dest0], 0x00(%[dst_y]) \n\t"
-
- "daddiu %[src_argb], %[src_argb], 0x20 \n\t"
- "daddiu %[dst_y], %[dst_y], 0x08 \n\t"
- "daddi %[width], %[width], -0x08 \n\t"
- "bnez %[width], 1b \n\t"
- : [src] "=&f"(src), [src_hi] "=&f"(src_hi), [src_lo] "=&f"(src_lo),
- [dest0] "=&f"(dest0), [dest1] "=&f"(dest1), [dest2] "=&f"(dest2),
- [dest3] "=&f"(dest3)
- : [src_argb] "r"(src_argb), [dst_y] "r"(dst_y), [width] "r"(width),
- [mask] "f"(mask), [value] "f"(value), [eight] "f"(0x08),
- [zero] "f"(0x00)
- : "memory");
-}
-
-void BGRAToUVRow_MMI(const uint8_t* src_rgb,
- int src_stride_rgb,
- uint8_t* dst_u,
- uint8_t* dst_v,
- int width) {
- uint64_t src_rgb1;
- uint64_t ftmp[13];
- uint64_t tmp[1];
- const uint64_t value = 0x4040;
- const uint64_t mask_u = 0x0002003800250013;
- const uint64_t mask_v = 0x0009002f00380002;
-
- __asm__ volatile(
- "dli %[tmp0], 0x0001000100010001 \n\t"
- "dmtc1 %[tmp0], %[ftmp12] \n\t"
- "1: \n\t"
- "daddu %[src_rgb1], %[src_rgb], %[src_stride_rgb] \n\t"
- "gsldrc1 %[src0], 0x00(%[src_rgb]) \n\t"
- "gsldlc1 %[src0], 0x07(%[src_rgb]) \n\t"
- "gsldrc1 %[src1], 0x00(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x07(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "paddh %[src0], %[src0], %[ftmp12] \n\t"
- "psrlh %[src0], %[src0], %[one] \n\t"
- "dsrl %[dest0_u], %[src0], %[sixteen] \n\t"
- "pinsrh_3 %[dest0_u], %[dest0_u], %[value] \n\t"
- "pinsrh_0 %[dest0_v], %[src0], %[value] \n\t"
- "pmaddhw %[dest0_u], %[dest0_u], %[mask_u] \n\t"
- "pmaddhw %[dest0_v], %[dest0_v], %[mask_v] \n\t"
-
- "gsldrc1 %[src0], 0x08(%[src_rgb]) \n\t"
- "gsldlc1 %[src0], 0x0f(%[src_rgb]) \n\t"
- "gsldrc1 %[src1], 0x08(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x0f(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "paddh %[src0], %[src0], %[ftmp12] \n\t"
- "psrlh %[src0], %[src0], %[one] \n\t"
- "dsrl %[src_lo], %[src0], %[sixteen] \n\t"
- "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
- "pinsrh_0 %[src_hi], %[src0], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
-
- "punpcklwd %[src0], %[dest0_u], %[src_lo] \n\t"
- "punpckhwd %[src1], %[dest0_u], %[src_lo] \n\t"
- "psubw %[dest0_u], %[src1], %[src0] \n\t"
- "psraw %[dest0_u], %[dest0_u], %[eight] \n\t"
- "punpcklwd %[src0], %[dest0_v], %[src_hi] \n\t"
- "punpckhwd %[src1], %[dest0_v], %[src_hi] \n\t"
- "psubw %[dest0_v], %[src0], %[src1] \n\t"
- "psraw %[dest0_v], %[dest0_v], %[eight] \n\t"
-
- "gsldrc1 %[src0], 0x10(%[src_rgb]) \n\t"
- "gsldlc1 %[src0], 0x17(%[src_rgb]) \n\t"
- "gsldrc1 %[src1], 0x10(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x17(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "paddh %[src0], %[src0], %[ftmp12] \n\t"
- "psrlh %[src0], %[src0], %[one] \n\t"
- "dsrl %[dest1_u], %[src0], %[sixteen] \n\t"
- "pinsrh_3 %[dest1_u], %[dest1_u], %[value] \n\t"
- "pinsrh_0 %[dest1_v], %[src0], %[value] \n\t"
- "pmaddhw %[dest1_u], %[dest1_u], %[mask_u] \n\t"
- "pmaddhw %[dest1_v], %[dest1_v], %[mask_v] \n\t"
-
- "gsldrc1 %[src0], 0x18(%[src_rgb]) \n\t"
- "gsldlc1 %[src0], 0x1f(%[src_rgb]) \n\t"
- "gsldrc1 %[src1], 0x18(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x1f(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "paddh %[src0], %[src0], %[ftmp12] \n\t"
- "psrlh %[src0], %[src0], %[one] \n\t"
- "dsrl %[src_lo], %[src0], %[sixteen] \n\t"
- "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
- "pinsrh_0 %[src_hi], %[src0], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
-
- "punpcklwd %[src0], %[dest1_u], %[src_lo] \n\t"
- "punpckhwd %[src1], %[dest1_u], %[src_lo] \n\t"
- "psubw %[dest1_u], %[src1], %[src0] \n\t"
- "psraw %[dest1_u], %[dest1_u], %[eight] \n\t"
- "punpcklwd %[src0], %[dest1_v], %[src_hi] \n\t"
- "punpckhwd %[src1], %[dest1_v], %[src_hi] \n\t"
- "psubw %[dest1_v], %[src0], %[src1] \n\t"
- "psraw %[dest1_v], %[dest1_v], %[eight] \n\t"
-
- "gsldrc1 %[src0], 0x20(%[src_rgb]) \n\t"
- "gsldlc1 %[src0], 0x27(%[src_rgb]) \n\t"
- "gsldrc1 %[src1], 0x20(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x27(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "paddh %[src0], %[src0], %[ftmp12] \n\t"
- "psrlh %[src0], %[src0], %[one] \n\t"
- "dsrl %[dest2_u], %[src0], %[sixteen] \n\t"
- "pinsrh_3 %[dest2_u], %[dest2_u], %[value] \n\t"
- "pinsrh_0 %[dest2_v], %[src0], %[value] \n\t"
- "pmaddhw %[dest2_u], %[dest2_u], %[mask_u] \n\t"
- "pmaddhw %[dest2_v], %[dest2_v], %[mask_v] \n\t"
-
- "gsldrc1 %[src0], 0x28(%[src_rgb]) \n\t"
- "gsldlc1 %[src0], 0x2f(%[src_rgb]) \n\t"
- "gsldrc1 %[src1], 0x28(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x2f(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "paddh %[src0], %[src0], %[ftmp12] \n\t"
- "psrlh %[src0], %[src0], %[one] \n\t"
- "dsrl %[src_lo], %[src0], %[sixteen] \n\t"
- "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
- "pinsrh_0 %[src_hi], %[src0], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
-
- "punpcklwd %[src0], %[dest2_u], %[src_lo] \n\t"
- "punpckhwd %[src1], %[dest2_u], %[src_lo] \n\t"
- "psubw %[dest2_u], %[src1], %[src0] \n\t"
- "psraw %[dest2_u], %[dest2_u], %[eight] \n\t"
- "punpcklwd %[src0], %[dest2_v], %[src_hi] \n\t"
- "punpckhwd %[src1], %[dest2_v], %[src_hi] \n\t"
- "psubw %[dest2_v], %[src0], %[src1] \n\t"
- "psraw %[dest2_v], %[dest2_v], %[eight] \n\t"
-
- "gsldrc1 %[src0], 0x30(%[src_rgb]) \n\t"
- "gsldlc1 %[src0], 0x37(%[src_rgb]) \n\t"
- "gsldrc1 %[src1], 0x30(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x37(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "paddh %[src0], %[src0], %[ftmp12] \n\t"
- "psrlh %[src0], %[src0], %[one] \n\t"
- "dsrl %[dest3_u], %[src0], %[sixteen] \n\t"
- "pinsrh_3 %[dest3_u], %[dest3_u], %[value] \n\t"
- "pinsrh_0 %[dest3_v], %[src0], %[value] \n\t"
- "pmaddhw %[dest3_u], %[dest3_u], %[mask_u] \n\t"
- "pmaddhw %[dest3_v], %[dest3_v], %[mask_v] \n\t"
-
- "gsldrc1 %[src0], 0x38(%[src_rgb]) \n\t"
- "gsldlc1 %[src0], 0x3f(%[src_rgb]) \n\t"
- "gsldrc1 %[src1], 0x38(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x3f(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "paddh %[src0], %[src0], %[ftmp12] \n\t"
- "psrlh %[src0], %[src0], %[one] \n\t"
- "dsrl %[src_lo], %[src0], %[sixteen] \n\t"
- "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
- "pinsrh_0 %[src_hi], %[src0], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
-
- "punpcklwd %[src0], %[dest3_u], %[src_lo] \n\t"
- "punpckhwd %[src1], %[dest3_u], %[src_lo] \n\t"
- "psubw %[dest3_u], %[src1], %[src0] \n\t"
- "psraw %[dest3_u], %[dest3_u], %[eight] \n\t"
- "punpcklwd %[src0], %[dest3_v], %[src_hi] \n\t"
- "punpckhwd %[src1], %[dest3_v], %[src_hi] \n\t"
- "psubw %[dest3_v], %[src0], %[src1] \n\t"
- "psraw %[dest3_v], %[dest3_v], %[eight] \n\t"
-
- "packsswh %[src0], %[dest0_u], %[dest1_u] \n\t"
- "packsswh %[src1], %[dest2_u], %[dest3_u] \n\t"
- "packushb %[dest0_u], %[src0], %[src1] \n\t"
- "gssdlc1 %[dest0_u], 0x07(%[dst_u]) \n\t"
- "gssdrc1 %[dest0_u], 0x00(%[dst_u]) \n\t"
-
- "packsswh %[src0], %[dest0_v], %[dest1_v] \n\t"
- "packsswh %[src1], %[dest2_v], %[dest3_v] \n\t"
- "packushb %[dest0_v], %[src0], %[src1] \n\t"
- "gssdlc1 %[dest0_v], 0x07(%[dst_v]) \n\t"
- "gssdrc1 %[dest0_v], 0x00(%[dst_v]) \n\t"
-
- "daddiu %[src_rgb], %[src_rgb], 0x40 \n\t"
- "daddiu %[dst_u], %[dst_u], 0x08 \n\t"
- "daddiu %[dst_v], %[dst_v], 0x08 \n\t"
- "daddi %[width], %[width], -0x10 \n\t"
- "bgtz %[width], 1b \n\t"
- : [src_rgb1] "=&r"(src_rgb1), [src0] "=&f"(ftmp[0]),
- [src1] "=&f"(ftmp[1]), [src_lo] "=&f"(ftmp[2]), [src_hi] "=&f"(ftmp[3]),
- [dest0_u] "=&f"(ftmp[4]), [dest0_v] "=&f"(ftmp[5]),
- [dest1_u] "=&f"(ftmp[6]), [dest1_v] "=&f"(ftmp[7]),
- [dest2_u] "=&f"(ftmp[8]), [dest2_v] "=&f"(ftmp[9]),
- [dest3_u] "=&f"(ftmp[10]), [dest3_v] "=&f"(ftmp[11]),
- [ftmp12] "=&f"(ftmp[12]), [tmp0] "=&r"(tmp[0])
- : [src_rgb] "r"(src_rgb), [src_stride_rgb] "r"(src_stride_rgb),
- [dst_u] "r"(dst_u), [dst_v] "r"(dst_v), [width] "r"(width),
- [mask_u] "f"(mask_u), [mask_v] "f"(mask_v), [value] "f"(value),
- [zero] "f"(0x00), [eight] "f"(0x08), [one] "f"(0x01),
- [sixteen] "f"(0x10)
- : "memory");
-}
-
-void ABGRToYRow_MMI(const uint8_t* src_argb, uint8_t* dst_y, int width) {
- uint64_t src, src_hi, src_lo;
- uint64_t dest0, dest1, dest2, dest3;
- const uint64_t value = 0x1080;
- const uint64_t mask = 0x0001001900810042;
-
- __asm__ volatile(
- "1: \n\t"
- "gsldlc1 %[src], 0x07(%[src_argb]) \n\t"
- "gsldrc1 %[src], 0x00(%[src_argb]) \n\t"
- "punpcklbh %[src_lo], %[src], %[zero] \n\t"
- "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
- "punpckhbh %[src_hi], %[src], %[zero] \n\t"
- "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
- "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
- "punpckhwd %[dest0], %[src_lo], %[src_hi] \n\t"
- "paddw %[dest0], %[dest0], %[src] \n\t"
- "psrlw %[dest0], %[dest0], %[eight] \n\t"
-
- "gsldlc1 %[src], 0x0f(%[src_argb]) \n\t"
- "gsldrc1 %[src], 0x08(%[src_argb]) \n\t"
- "punpcklbh %[src_lo], %[src], %[zero] \n\t"
- "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
- "punpckhbh %[src_hi], %[src], %[zero] \n\t"
- "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
- "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
- "punpckhwd %[dest1], %[src_lo], %[src_hi] \n\t"
- "paddw %[dest1], %[dest1], %[src] \n\t"
- "psrlw %[dest1], %[dest1], %[eight] \n\t"
-
- "gsldlc1 %[src], 0x17(%[src_argb]) \n\t"
- "gsldrc1 %[src], 0x10(%[src_argb]) \n\t"
- "punpcklbh %[src_lo], %[src], %[zero] \n\t"
- "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
- "punpckhbh %[src_hi], %[src], %[zero] \n\t"
- "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
- "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
- "punpckhwd %[dest2], %[src_lo], %[src_hi] \n\t"
- "paddw %[dest2], %[dest2], %[src] \n\t"
- "psrlw %[dest2], %[dest2], %[eight] \n\t"
-
- "gsldlc1 %[src], 0x1f(%[src_argb]) \n\t"
- "gsldrc1 %[src], 0x18(%[src_argb]) \n\t"
- "punpcklbh %[src_lo], %[src], %[zero] \n\t"
- "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
- "punpckhbh %[src_hi], %[src], %[zero] \n\t"
- "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
- "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
- "punpckhwd %[dest3], %[src_lo], %[src_hi] \n\t"
- "paddw %[dest3], %[dest3], %[src] \n\t"
- "psrlw %[dest3], %[dest3], %[eight] \n\t"
-
- "packsswh %[src_lo], %[dest0], %[dest1] \n\t"
- "packsswh %[src_hi], %[dest2], %[dest3] \n\t"
- "packushb %[dest0], %[src_lo], %[src_hi] \n\t"
- "gssdlc1 %[dest0], 0x07(%[dst_y]) \n\t"
- "gssdrc1 %[dest0], 0x00(%[dst_y]) \n\t"
-
- "daddiu %[src_argb], %[src_argb], 0x20 \n\t"
- "daddiu %[dst_y], %[dst_y], 0x08 \n\t"
- "daddi %[width], %[width], -0x08 \n\t"
- "bnez %[width], 1b \n\t"
- : [src] "=&f"(src), [src_hi] "=&f"(src_hi), [src_lo] "=&f"(src_lo),
- [dest0] "=&f"(dest0), [dest1] "=&f"(dest1), [dest2] "=&f"(dest2),
- [dest3] "=&f"(dest3)
- : [src_argb] "r"(src_argb), [dst_y] "r"(dst_y), [width] "r"(width),
- [mask] "f"(mask), [value] "f"(value), [eight] "f"(0x08),
- [zero] "f"(0x00)
- : "memory");
-}
-
-void ABGRToUVRow_MMI(const uint8_t* src_rgb,
- int src_stride_rgb,
- uint8_t* dst_u,
- uint8_t* dst_v,
- int width) {
- uint64_t src_rgb1;
- uint64_t ftmp[13];
- uint64_t tmp[1];
- const uint64_t value = 0x4040;
- const uint64_t mask_u = 0x0002003800250013;
- const uint64_t mask_v = 0x0009002F00380002;
-
- __asm__ volatile(
- "dli %[tmp0], 0x0001000100010001 \n\t"
- "dmtc1 %[tmp0], %[ftmp12] \n\t"
- "1: \n\t"
- "daddu %[src_rgb1], %[src_rgb], %[src_stride_rgb] \n\t"
- "gsldrc1 %[src0], 0x00(%[src_rgb]) \n\t"
- "gsldlc1 %[src0], 0x07(%[src_rgb]) \n\t"
- "gsldrc1 %[src1], 0x00(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x07(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "paddh %[src0], %[src0], %[ftmp12] \n\t"
- "psrlh %[src0], %[src0], %[one] \n\t"
- "pinsrh_3 %[dest0_u], %[src0], %[value] \n\t"
- "dsll %[dest0_v], %[src0], %[sixteen] \n\t"
- "pinsrh_0 %[dest0_v], %[dest0_v], %[value] \n\t"
- "pmaddhw %[dest0_u], %[dest0_u], %[mask_u] \n\t"
- "pmaddhw %[dest0_v], %[dest0_v], %[mask_v] \n\t"
-
- "gsldrc1 %[src0], 0x08(%[src_rgb]) \n\t"
- "gsldlc1 %[src0], 0x0f(%[src_rgb]) \n\t"
- "gsldrc1 %[src1], 0x08(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x0f(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "paddh %[src0], %[src0], %[ftmp12] \n\t"
- "psrlh %[src0], %[src0], %[one] \n\t"
- "pinsrh_3 %[src_lo], %[src0], %[value] \n\t"
- "dsll %[src_hi], %[src0], %[sixteen] \n\t"
- "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
-
- "punpcklwd %[src0], %[dest0_u], %[src_lo] \n\t"
- "punpckhwd %[src1], %[dest0_u], %[src_lo] \n\t"
- "psubw %[dest0_u], %[src1], %[src0] \n\t"
- "psraw %[dest0_u], %[dest0_u], %[eight] \n\t"
- "punpcklwd %[src0], %[dest0_v], %[src_hi] \n\t"
- "punpckhwd %[src1], %[dest0_v], %[src_hi] \n\t"
- "psubw %[dest0_v], %[src0], %[src1] \n\t"
- "psraw %[dest0_v], %[dest0_v], %[eight] \n\t"
-
- "gsldrc1 %[src0], 0x10(%[src_rgb]) \n\t"
- "gsldlc1 %[src0], 0x17(%[src_rgb]) \n\t"
- "gsldrc1 %[src1], 0x10(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x17(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "paddh %[src0], %[src0], %[ftmp12] \n\t"
- "psrlh %[src0], %[src0], %[one] \n\t"
- "pinsrh_3 %[dest1_u], %[src0], %[value] \n\t"
- "dsll %[dest1_v], %[src0], %[sixteen] \n\t"
- "pinsrh_0 %[dest1_v], %[dest1_v], %[value] \n\t"
- "pmaddhw %[dest1_u], %[dest1_u], %[mask_u] \n\t"
- "pmaddhw %[dest1_v], %[dest1_v], %[mask_v] \n\t"
-
- "gsldrc1 %[src0], 0x18(%[src_rgb]) \n\t"
- "gsldlc1 %[src0], 0x1f(%[src_rgb]) \n\t"
- "gsldrc1 %[src1], 0x18(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x1f(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "paddh %[src0], %[src0], %[ftmp12] \n\t"
- "psrlh %[src0], %[src0], %[one] \n\t"
- "pinsrh_3 %[src_lo], %[src0], %[value] \n\t"
- "dsll %[src_hi], %[src0], %[sixteen] \n\t"
- "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
-
- "punpcklwd %[src0], %[dest1_u], %[src_lo] \n\t"
- "punpckhwd %[src1], %[dest1_u], %[src_lo] \n\t"
- "psubw %[dest1_u], %[src1], %[src0] \n\t"
- "psraw %[dest1_u], %[dest1_u], %[eight] \n\t"
- "punpcklwd %[src0], %[dest1_v], %[src_hi] \n\t"
- "punpckhwd %[src1], %[dest1_v], %[src_hi] \n\t"
- "psubw %[dest1_v], %[src0], %[src1] \n\t"
- "psraw %[dest1_v], %[dest1_v], %[eight] \n\t"
-
- "gsldrc1 %[src0], 0x20(%[src_rgb]) \n\t"
- "gsldlc1 %[src0], 0x27(%[src_rgb]) \n\t"
- "gsldrc1 %[src1], 0x20(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x27(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "paddh %[src0], %[src0], %[ftmp12] \n\t"
- "psrlh %[src0], %[src0], %[one] \n\t"
- "pinsrh_3 %[dest2_u], %[src0], %[value] \n\t"
- "dsll %[dest2_v], %[src0], %[sixteen] \n\t"
- "pinsrh_0 %[dest2_v], %[dest2_v], %[value] \n\t"
- "pmaddhw %[dest2_u], %[dest2_u], %[mask_u] \n\t"
- "pmaddhw %[dest2_v], %[dest2_v], %[mask_v] \n\t"
-
- "gsldrc1 %[src0], 0x28(%[src_rgb]) \n\t"
- "gsldlc1 %[src0], 0x2f(%[src_rgb]) \n\t"
- "gsldrc1 %[src1], 0x28(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x2f(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "paddh %[src0], %[src0], %[ftmp12] \n\t"
- "psrlh %[src0], %[src0], %[one] \n\t"
- "pinsrh_3 %[src_lo], %[src0], %[value] \n\t"
- "dsll %[src_hi], %[src0], %[sixteen] \n\t"
- "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
-
- "punpcklwd %[src0], %[dest2_u], %[src_lo] \n\t"
- "punpckhwd %[src1], %[dest2_u], %[src_lo] \n\t"
- "psubw %[dest2_u], %[src1], %[src0] \n\t"
- "psraw %[dest2_u], %[dest2_u], %[eight] \n\t"
- "punpcklwd %[src0], %[dest2_v], %[src_hi] \n\t"
- "punpckhwd %[src1], %[dest2_v], %[src_hi] \n\t"
- "psubw %[dest2_v], %[src0], %[src1] \n\t"
- "psraw %[dest2_v], %[dest2_v], %[eight] \n\t"
-
- "gsldrc1 %[src0], 0x30(%[src_rgb]) \n\t"
- "gsldlc1 %[src0], 0x37(%[src_rgb]) \n\t"
- "gsldrc1 %[src1], 0x30(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x37(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "paddh %[src0], %[src0], %[ftmp12] \n\t"
- "psrlh %[src0], %[src0], %[one] \n\t"
- "pinsrh_3 %[dest3_u], %[src0], %[value] \n\t"
- "dsll %[dest3_v], %[src0], %[sixteen] \n\t"
- "pinsrh_0 %[dest3_v], %[dest3_v], %[value] \n\t"
- "pmaddhw %[dest3_u], %[dest3_u], %[mask_u] \n\t"
- "pmaddhw %[dest3_v], %[dest3_v], %[mask_v] \n\t"
-
- "gsldrc1 %[src0], 0x38(%[src_rgb]) \n\t"
- "gsldlc1 %[src0], 0x3f(%[src_rgb]) \n\t"
- "gsldrc1 %[src1], 0x38(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x3f(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "paddh %[src0], %[src0], %[ftmp12] \n\t"
- "psrlh %[src0], %[src0], %[one] \n\t"
- "pinsrh_3 %[src_lo], %[src0], %[value] \n\t"
- "dsll %[src_hi], %[src0], %[sixteen] \n\t"
- "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
-
- "punpcklwd %[src0], %[dest3_u], %[src_lo] \n\t"
- "punpckhwd %[src1], %[dest3_u], %[src_lo] \n\t"
- "psubw %[dest3_u], %[src1], %[src0] \n\t"
- "psraw %[dest3_u], %[dest3_u], %[eight] \n\t"
- "punpcklwd %[src0], %[dest3_v], %[src_hi] \n\t"
- "punpckhwd %[src1], %[dest3_v], %[src_hi] \n\t"
- "psubw %[dest3_v], %[src0], %[src1] \n\t"
- "psraw %[dest3_v], %[dest3_v], %[eight] \n\t"
-
- "packsswh %[src0], %[dest0_u], %[dest1_u] \n\t"
- "packsswh %[src1], %[dest2_u], %[dest3_u] \n\t"
- "packushb %[dest0_u], %[src0], %[src1] \n\t"
- "gssdlc1 %[dest0_u], 0x07(%[dst_u]) \n\t"
- "gssdrc1 %[dest0_u], 0x00(%[dst_u]) \n\t"
-
- "packsswh %[src0], %[dest0_v], %[dest1_v] \n\t"
- "packsswh %[src1], %[dest2_v], %[dest3_v] \n\t"
- "packushb %[dest0_v], %[src0], %[src1] \n\t"
- "gssdlc1 %[dest0_v], 0x07(%[dst_v]) \n\t"
- "gssdrc1 %[dest0_v], 0x00(%[dst_v]) \n\t"
-
- "daddiu %[src_rgb], %[src_rgb], 0x40 \n\t"
- "daddiu %[dst_u], %[dst_u], 0x08 \n\t"
- "daddiu %[dst_v], %[dst_v], 0x08 \n\t"
- "daddi %[width], %[width], -0x10 \n\t"
- "bgtz %[width], 1b \n\t"
- : [src_rgb1] "=&r"(src_rgb1), [src0] "=&f"(ftmp[0]),
- [src1] "=&f"(ftmp[1]), [src_lo] "=&f"(ftmp[2]), [src_hi] "=&f"(ftmp[3]),
- [dest0_u] "=&f"(ftmp[4]), [dest0_v] "=&f"(ftmp[5]),
- [dest1_u] "=&f"(ftmp[6]), [dest1_v] "=&f"(ftmp[7]),
- [dest2_u] "=&f"(ftmp[8]), [dest2_v] "=&f"(ftmp[9]),
- [dest3_u] "=&f"(ftmp[10]), [dest3_v] "=&f"(ftmp[11]),
- [ftmp12] "=&f"(ftmp[12]), [tmp0] "=&r"(tmp[0])
- : [src_rgb] "r"(src_rgb), [src_stride_rgb] "r"(src_stride_rgb),
- [dst_u] "r"(dst_u), [dst_v] "r"(dst_v), [width] "r"(width),
- [mask_u] "f"(mask_u), [mask_v] "f"(mask_v), [value] "f"(value),
- [zero] "f"(0x00), [eight] "f"(0x08), [one] "f"(0x01),
- [sixteen] "f"(0x10)
- : "memory");
-}
-
-void RGBAToYRow_MMI(const uint8_t* src_argb, uint8_t* dst_y, int width) {
- uint64_t src, src_hi, src_lo;
- uint64_t dest0, dest1, dest2, dest3;
- const uint64_t value = 0x1080;
- const uint64_t mask = 0x0042008100190001;
-
- __asm__ volatile(
- "1: \n\t"
- "gsldlc1 %[src], 0x07(%[src_argb]) \n\t"
- "gsldrc1 %[src], 0x00(%[src_argb]) \n\t"
- "punpcklbh %[src_lo], %[src], %[zero] \n\t"
- "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
- "punpckhbh %[src_hi], %[src], %[zero] \n\t"
- "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
- "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
- "punpckhwd %[dest0], %[src_lo], %[src_hi] \n\t"
- "paddw %[dest0], %[dest0], %[src] \n\t"
- "psrlw %[dest0], %[dest0], %[eight] \n\t"
-
- "gsldlc1 %[src], 0x0f(%[src_argb]) \n\t"
- "gsldrc1 %[src], 0x08(%[src_argb]) \n\t"
- "punpcklbh %[src_lo], %[src], %[zero] \n\t"
- "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
- "punpckhbh %[src_hi], %[src], %[zero] \n\t"
- "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
- "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
- "punpckhwd %[dest1], %[src_lo], %[src_hi] \n\t"
- "paddw %[dest1], %[dest1], %[src] \n\t"
- "psrlw %[dest1], %[dest1], %[eight] \n\t"
-
- "gsldlc1 %[src], 0x17(%[src_argb]) \n\t"
- "gsldrc1 %[src], 0x10(%[src_argb]) \n\t"
- "punpcklbh %[src_lo], %[src], %[zero] \n\t"
- "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
- "punpckhbh %[src_hi], %[src], %[zero] \n\t"
- "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
- "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
- "punpckhwd %[dest2], %[src_lo], %[src_hi] \n\t"
- "paddw %[dest2], %[dest2], %[src] \n\t"
- "psrlw %[dest2], %[dest2], %[eight] \n\t"
-
- "gsldlc1 %[src], 0x1f(%[src_argb]) \n\t"
- "gsldrc1 %[src], 0x18(%[src_argb]) \n\t"
- "punpcklbh %[src_lo], %[src], %[zero] \n\t"
- "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
- "punpckhbh %[src_hi], %[src], %[zero] \n\t"
- "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
- "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
- "punpckhwd %[dest3], %[src_lo], %[src_hi] \n\t"
- "paddw %[dest3], %[dest3], %[src] \n\t"
- "psrlw %[dest3], %[dest3], %[eight] \n\t"
-
- "packsswh %[src_lo], %[dest0], %[dest1] \n\t"
- "packsswh %[src_hi], %[dest2], %[dest3] \n\t"
- "packushb %[dest0], %[src_lo], %[src_hi] \n\t"
- "gssdlc1 %[dest0], 0x07(%[dst_y]) \n\t"
- "gssdrc1 %[dest0], 0x00(%[dst_y]) \n\t"
-
- "daddiu %[src_argb], %[src_argb], 0x20 \n\t"
- "daddiu %[dst_y], %[dst_y], 0x08 \n\t"
- "daddi %[width], %[width], -0x08 \n\t"
- "bnez %[width], 1b \n\t"
- : [src] "=&f"(src), [src_hi] "=&f"(src_hi), [src_lo] "=&f"(src_lo),
- [dest0] "=&f"(dest0), [dest1] "=&f"(dest1), [dest2] "=&f"(dest2),
- [dest3] "=&f"(dest3)
- : [src_argb] "r"(src_argb), [dst_y] "r"(dst_y), [width] "r"(width),
- [mask] "f"(mask), [value] "f"(value), [eight] "f"(0x08),
- [zero] "f"(0x00)
- : "memory");
-}
-
-void RGBAToUVRow_MMI(const uint8_t* src_rgb,
- int src_stride_rgb,
- uint8_t* dst_u,
- uint8_t* dst_v,
- int width) {
- uint64_t src_rgb1;
- uint64_t ftmp[13];
- uint64_t tmp[1];
- const uint64_t value = 0x4040;
- const uint64_t mask_u = 0x0013002500380002;
- const uint64_t mask_v = 0x00020038002f0009;
-
- __asm__ volatile(
- "dli %[tmp0], 0x0001000100010001 \n\t"
- "dmtc1 %[tmp0], %[ftmp12] \n\t"
- "1: \n\t"
- "daddu %[src_rgb1], %[src_rgb], %[src_stride_rgb] \n\t"
- "gsldrc1 %[src0], 0x00(%[src_rgb]) \n\t"
- "gsldlc1 %[src0], 0x07(%[src_rgb]) \n\t"
- "gsldrc1 %[src1], 0x00(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x07(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "paddh %[src0], %[src0], %[ftmp12] \n\t"
- "psrlh %[src0], %[src0], %[one] \n\t"
- "pinsrh_0 %[dest0_u], %[src0], %[value] \n\t"
- "dsrl %[dest0_v], %[src0], %[sixteen] \n\t"
- "pinsrh_3 %[dest0_v], %[dest0_v], %[value] \n\t"
- "pmaddhw %[dest0_u], %[dest0_u], %[mask_u] \n\t"
- "pmaddhw %[dest0_v], %[dest0_v], %[mask_v] \n\t"
-
- "gsldrc1 %[src0], 0x08(%[src_rgb]) \n\t"
- "gsldlc1 %[src0], 0x0f(%[src_rgb]) \n\t"
- "gsldrc1 %[src1], 0x08(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x0f(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "paddh %[src0], %[src0], %[ftmp12] \n\t"
- "psrlh %[src0], %[src0], %[one] \n\t"
- "pinsrh_0 %[src_lo], %[src0], %[value] \n\t"
- "dsrl %[src_hi], %[src0], %[sixteen] \n\t"
- "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
-
- "punpcklwd %[src0], %[dest0_u], %[src_lo] \n\t"
- "punpckhwd %[src1], %[dest0_u], %[src_lo] \n\t"
- "psubw %[dest0_u], %[src0], %[src1] \n\t"
- "psraw %[dest0_u], %[dest0_u], %[eight] \n\t"
- "punpcklwd %[src0], %[dest0_v], %[src_hi] \n\t"
- "punpckhwd %[src1], %[dest0_v], %[src_hi] \n\t"
- "psubw %[dest0_v], %[src1], %[src0] \n\t"
- "psraw %[dest0_v], %[dest0_v], %[eight] \n\t"
-
- "gsldrc1 %[src0], 0x10(%[src_rgb]) \n\t"
- "gsldlc1 %[src0], 0x17(%[src_rgb]) \n\t"
- "gsldrc1 %[src1], 0x10(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x17(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "paddh %[src0], %[src0], %[ftmp12] \n\t"
- "psrlh %[src0], %[src0], %[one] \n\t"
- "pinsrh_0 %[dest1_u], %[src0], %[value] \n\t"
- "dsrl %[dest1_v], %[src0], %[sixteen] \n\t"
- "pinsrh_3 %[dest1_v], %[dest1_v], %[value] \n\t"
- "pmaddhw %[dest1_u], %[dest1_u], %[mask_u] \n\t"
- "pmaddhw %[dest1_v], %[dest1_v], %[mask_v] \n\t"
-
- "gsldrc1 %[src0], 0x18(%[src_rgb]) \n\t"
- "gsldlc1 %[src0], 0x1f(%[src_rgb]) \n\t"
- "gsldrc1 %[src1], 0x18(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x1f(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "paddh %[src0], %[src0], %[ftmp12] \n\t"
- "psrlh %[src0], %[src0], %[one] \n\t"
- "pinsrh_0 %[src_lo], %[src0], %[value] \n\t"
- "dsrl %[src_hi], %[src0], %[sixteen] \n\t"
- "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
-
- "punpcklwd %[src0], %[dest1_u], %[src_lo] \n\t"
- "punpckhwd %[src1], %[dest1_u], %[src_lo] \n\t"
- "psubw %[dest1_u], %[src0], %[src1] \n\t"
- "psraw %[dest1_u], %[dest1_u], %[eight] \n\t"
- "punpcklwd %[src0], %[dest1_v], %[src_hi] \n\t"
- "punpckhwd %[src1], %[dest1_v], %[src_hi] \n\t"
- "psubw %[dest1_v], %[src1], %[src0] \n\t"
- "psraw %[dest1_v], %[dest1_v], %[eight] \n\t"
-
- "gsldrc1 %[src0], 0x20(%[src_rgb]) \n\t"
- "gsldlc1 %[src0], 0x27(%[src_rgb]) \n\t"
- "gsldrc1 %[src1], 0x20(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x27(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "paddh %[src0], %[src0], %[ftmp12] \n\t"
- "psrlh %[src0], %[src0], %[one] \n\t"
- "pinsrh_0 %[dest2_u], %[src0], %[value] \n\t"
- "dsrl %[dest2_v], %[src0], %[sixteen] \n\t"
- "pinsrh_3 %[dest2_v], %[dest2_v], %[value] \n\t"
- "pmaddhw %[dest2_u], %[dest2_u], %[mask_u] \n\t"
- "pmaddhw %[dest2_v], %[dest2_v], %[mask_v] \n\t"
-
- "gsldrc1 %[src0], 0x28(%[src_rgb]) \n\t"
- "gsldlc1 %[src0], 0x2f(%[src_rgb]) \n\t"
- "gsldrc1 %[src1], 0x28(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x2f(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "paddh %[src0], %[src0], %[ftmp12] \n\t"
- "psrlh %[src0], %[src0], %[one] \n\t"
- "pinsrh_0 %[src_lo], %[src0], %[value] \n\t"
- "dsrl %[src_hi], %[src0], %[sixteen] \n\t"
- "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
-
- "punpcklwd %[src0], %[dest2_u], %[src_lo] \n\t"
- "punpckhwd %[src1], %[dest2_u], %[src_lo] \n\t"
- "psubw %[dest2_u], %[src0], %[src1] \n\t"
- "psraw %[dest2_u], %[dest2_u], %[eight] \n\t"
- "punpcklwd %[src0], %[dest2_v], %[src_hi] \n\t"
- "punpckhwd %[src1], %[dest2_v], %[src_hi] \n\t"
- "psubw %[dest2_v], %[src1], %[src0] \n\t"
- "psraw %[dest2_v], %[dest2_v], %[eight] \n\t"
-
- "gsldrc1 %[src0], 0x30(%[src_rgb]) \n\t"
- "gsldlc1 %[src0], 0x37(%[src_rgb]) \n\t"
- "gsldrc1 %[src1], 0x30(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x37(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "paddh %[src0], %[src0], %[ftmp12] \n\t"
- "psrlh %[src0], %[src0], %[one] \n\t"
- "pinsrh_0 %[dest3_u], %[src0], %[value] \n\t"
- "dsrl %[dest3_v], %[src0], %[sixteen] \n\t"
- "pinsrh_3 %[dest3_v], %[dest3_v], %[value] \n\t"
- "pmaddhw %[dest3_u], %[dest3_u], %[mask_u] \n\t"
- "pmaddhw %[dest3_v], %[dest3_v], %[mask_v] \n\t"
-
- "gsldrc1 %[src0], 0x38(%[src_rgb]) \n\t"
- "gsldlc1 %[src0], 0x3f(%[src_rgb]) \n\t"
- "gsldrc1 %[src1], 0x38(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x3f(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "paddh %[src0], %[src0], %[ftmp12] \n\t"
- "psrlh %[src0], %[src0], %[one] \n\t"
- "pinsrh_0 %[src_lo], %[src0], %[value] \n\t"
- "dsrl %[src_hi], %[src0], %[sixteen] \n\t"
- "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
-
- "punpcklwd %[src0], %[dest3_u], %[src_lo] \n\t"
- "punpckhwd %[src1], %[dest3_u], %[src_lo] \n\t"
- "psubw %[dest3_u], %[src0], %[src1] \n\t"
- "psraw %[dest3_u], %[dest3_u], %[eight] \n\t"
- "punpcklwd %[src0], %[dest3_v], %[src_hi] \n\t"
- "punpckhwd %[src1], %[dest3_v], %[src_hi] \n\t"
- "psubw %[dest3_v], %[src1], %[src0] \n\t"
- "psraw %[dest3_v], %[dest3_v], %[eight] \n\t"
-
- "packsswh %[src0], %[dest0_u], %[dest1_u] \n\t"
- "packsswh %[src1], %[dest2_u], %[dest3_u] \n\t"
- "packushb %[dest0_u], %[src0], %[src1] \n\t"
- "gssdlc1 %[dest0_u], 0x07(%[dst_u]) \n\t"
- "gssdrc1 %[dest0_u], 0x00(%[dst_u]) \n\t"
-
- "packsswh %[src0], %[dest0_v], %[dest1_v] \n\t"
- "packsswh %[src1], %[dest2_v], %[dest3_v] \n\t"
- "packushb %[dest0_v], %[src0], %[src1] \n\t"
- "gssdlc1 %[dest0_v], 0x07(%[dst_v]) \n\t"
- "gssdrc1 %[dest0_v], 0x00(%[dst_v]) \n\t"
-
- "daddiu %[src_rgb], %[src_rgb], 0x40 \n\t"
- "daddiu %[dst_u], %[dst_u], 0x08 \n\t"
- "daddiu %[dst_v], %[dst_v], 0x08 \n\t"
- "daddi %[width], %[width], -0x10 \n\t"
- "bgtz %[width], 1b \n\t"
- : [src_rgb1] "=&r"(src_rgb1), [src0] "=&f"(ftmp[0]),
- [src1] "=&f"(ftmp[1]), [src_lo] "=&f"(ftmp[2]), [src_hi] "=&f"(ftmp[3]),
- [dest0_u] "=&f"(ftmp[4]), [dest0_v] "=&f"(ftmp[5]),
- [dest1_u] "=&f"(ftmp[6]), [dest1_v] "=&f"(ftmp[7]),
- [dest2_u] "=&f"(ftmp[8]), [dest2_v] "=&f"(ftmp[9]),
- [dest3_u] "=&f"(ftmp[10]), [dest3_v] "=&f"(ftmp[11]),
- [ftmp12] "=&f"(ftmp[12]), [tmp0] "=&r"(tmp[0])
- : [src_rgb] "r"(src_rgb), [src_stride_rgb] "r"(src_stride_rgb),
- [dst_u] "r"(dst_u), [dst_v] "r"(dst_v), [width] "r"(width),
- [mask_u] "f"(mask_u), [mask_v] "f"(mask_v), [value] "f"(value),
- [zero] "f"(0x00), [eight] "f"(0x08), [one] "f"(0x01),
- [sixteen] "f"(0x10)
- : "memory");
-}
-
-void RGB24ToYRow_MMI(const uint8_t* src_argb, uint8_t* dst_y, int width) {
- uint64_t src, src_hi, src_lo;
- uint64_t dest0, dest1, dest2, dest3;
- const uint64_t value = 0x1080;
- const uint64_t mask = 0x0001004200810019;
-
- __asm__ volatile(
- "1: \n\t"
- "gsldlc1 %[src], 0x07(%[src_argb]) \n\t"
- "gsldrc1 %[src], 0x00(%[src_argb]) \n\t"
- "punpcklbh %[src_lo], %[src], %[zero] \n\t"
- "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
- "dsll %[src], %[src], %[eight] \n\t"
- "punpckhbh %[src_hi], %[src], %[zero] \n\t"
- "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
- "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
- "punpckhwd %[dest0], %[src_lo], %[src_hi] \n\t"
- "paddw %[dest0], %[dest0], %[src] \n\t"
- "psrlw %[dest0], %[dest0], %[eight] \n\t"
-
- "gsldlc1 %[src], 0x0d(%[src_argb]) \n\t"
- "gsldrc1 %[src], 0x06(%[src_argb]) \n\t"
- "punpcklbh %[src_lo], %[src], %[zero] \n\t"
- "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
- "dsll %[src], %[src], %[eight] \n\t"
- "punpckhbh %[src_hi], %[src], %[zero] \n\t"
- "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
- "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
- "punpckhwd %[dest1], %[src_lo], %[src_hi] \n\t"
- "paddw %[dest1], %[dest1], %[src] \n\t"
- "psrlw %[dest1], %[dest1], %[eight] \n\t"
-
- "gsldlc1 %[src], 0x13(%[src_argb]) \n\t"
- "gsldrc1 %[src], 0x0c(%[src_argb]) \n\t"
- "punpcklbh %[src_lo], %[src], %[zero] \n\t"
- "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
- "dsll %[src], %[src], %[eight] \n\t"
- "punpckhbh %[src_hi], %[src], %[zero] \n\t"
- "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
- "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
- "punpckhwd %[dest2], %[src_lo], %[src_hi] \n\t"
- "paddw %[dest2], %[dest2], %[src] \n\t"
- "psrlw %[dest2], %[dest2], %[eight] \n\t"
-
- "gsldlc1 %[src], 0x19(%[src_argb]) \n\t"
- "gsldrc1 %[src], 0x12(%[src_argb]) \n\t"
- "punpcklbh %[src_lo], %[src], %[zero] \n\t"
- "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
- "dsll %[src], %[src], %[eight] \n\t"
- "punpckhbh %[src_hi], %[src], %[zero] \n\t"
- "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
- "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
- "punpckhwd %[dest3], %[src_lo], %[src_hi] \n\t"
- "paddw %[dest3], %[dest3], %[src] \n\t"
- "psrlw %[dest3], %[dest3], %[eight] \n\t"
-
- "packsswh %[src_lo], %[dest0], %[dest1] \n\t"
- "packsswh %[src_hi], %[dest2], %[dest3] \n\t"
- "packushb %[dest0], %[src_lo], %[src_hi] \n\t"
- "gssdlc1 %[dest0], 0x07(%[dst_y]) \n\t"
- "gssdrc1 %[dest0], 0x00(%[dst_y]) \n\t"
-
- "daddiu %[src_argb], %[src_argb], 0x18 \n\t"
- "daddiu %[dst_y], %[dst_y], 0x08 \n\t"
- "daddi %[width], %[width], -0x08 \n\t"
- "bnez %[width], 1b \n\t"
- : [src] "=&f"(src), [src_hi] "=&f"(src_hi), [src_lo] "=&f"(src_lo),
- [dest0] "=&f"(dest0), [dest1] "=&f"(dest1), [dest2] "=&f"(dest2),
- [dest3] "=&f"(dest3)
- : [src_argb] "r"(src_argb), [dst_y] "r"(dst_y), [width] "r"(width),
- [mask] "f"(mask), [value] "f"(value), [eight] "f"(0x08),
- [zero] "f"(0x00)
- : "memory");
-}
-
-void RGB24ToUVRow_MMI(const uint8_t* src_rgb,
- int src_stride_rgb,
- uint8_t* dst_u,
- uint8_t* dst_v,
- int width) {
- uint64_t src_rgb1;
- uint64_t ftmp[13];
- uint64_t tmp[1];
- const uint64_t value = 0x4040;
- const uint64_t mask_u = 0x0013002500380002;
- const uint64_t mask_v = 0x00020038002f0009;
-
- __asm__ volatile(
- "dli %[tmp0], 0x0001000100010001 \n\t"
- "dmtc1 %[tmp0], %[ftmp12] \n\t"
- "1: \n\t"
- "daddu %[src_rgb1], %[src_rgb], %[src_stride_rgb] \n\t"
- "gsldrc1 %[src0], 0x00(%[src_rgb]) \n\t"
- "gsldlc1 %[src0], 0x07(%[src_rgb]) \n\t"
- "gsldrc1 %[src1], 0x00(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x07(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "dsll %[src0], %[src0], %[eight] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "dsll %[src1], %[src1], %[eight] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "paddh %[src0], %[src0], %[ftmp12] \n\t"
- "psrlh %[src0], %[src0], %[one] \n\t"
- "dsll %[dest0_u], %[src0], %[sixteen] \n\t"
- "pinsrh_0 %[dest0_u], %[dest0_u], %[value] \n\t"
- "pinsrh_3 %[dest0_v], %[src0], %[value] \n\t"
- "pmaddhw %[dest0_u], %[dest0_u], %[mask_u] \n\t"
- "pmaddhw %[dest0_v], %[dest0_v], %[mask_v] \n\t"
-
- "gsldrc1 %[src0], 0x06(%[src_rgb]) \n\t"
- "gsldlc1 %[src0], 0x0d(%[src_rgb]) \n\t"
- "gsldrc1 %[src1], 0x06(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x0d(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "dsll %[src0], %[src0], %[eight] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "dsll %[src1], %[src1], %[eight] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "paddh %[src0], %[src0], %[ftmp12] \n\t"
- "psrlh %[src0], %[src0], %[one] \n\t"
- "dsll %[src_lo], %[src0], %[sixteen] \n\t"
- "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
- "pinsrh_3 %[src_hi], %[src0], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
-
- "punpcklwd %[src0], %[dest0_u], %[src_lo] \n\t"
- "punpckhwd %[src1], %[dest0_u], %[src_lo] \n\t"
- "psubw %[dest0_u], %[src0], %[src1] \n\t"
- "psraw %[dest0_u], %[dest0_u], %[eight] \n\t"
- "punpcklwd %[src0], %[dest0_v], %[src_hi] \n\t"
- "punpckhwd %[src1], %[dest0_v], %[src_hi] \n\t"
- "psubw %[dest0_v], %[src1], %[src0] \n\t"
- "psraw %[dest0_v], %[dest0_v], %[eight] \n\t"
-
- "gsldrc1 %[src0], 0x0c(%[src_rgb]) \n\t"
- "gsldlc1 %[src0], 0x13(%[src_rgb]) \n\t"
- "gsldrc1 %[src1], 0x0c(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x13(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "dsll %[src0], %[src0], %[eight] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "dsll %[src1], %[src1], %[eight] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "paddh %[src0], %[src0], %[ftmp12] \n\t"
- "psrlh %[src0], %[src0], %[one] \n\t"
- "dsll %[dest1_u], %[src0], %[sixteen] \n\t"
- "pinsrh_0 %[dest1_u], %[dest1_u], %[value] \n\t"
- "pinsrh_3 %[dest1_v], %[src0], %[value] \n\t"
- "pmaddhw %[dest1_u], %[dest1_u], %[mask_u] \n\t"
- "pmaddhw %[dest1_v], %[dest1_v], %[mask_v] \n\t"
-
- "gsldrc1 %[src0], 0x12(%[src_rgb]) \n\t"
- "gsldlc1 %[src0], 0x19(%[src_rgb]) \n\t"
- "gsldrc1 %[src1], 0x12(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x19(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "dsll %[src0], %[src0], %[eight] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "dsll %[src1], %[src1], %[eight] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "paddh %[src0], %[src0], %[ftmp12] \n\t"
- "psrlh %[src0], %[src0], %[one] \n\t"
- "dsll %[src_lo], %[src0], %[sixteen] \n\t"
- "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
- "pinsrh_3 %[src_hi], %[src0], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
-
- "punpcklwd %[src0], %[dest1_u], %[src_lo] \n\t"
- "punpckhwd %[src1], %[dest1_u], %[src_lo] \n\t"
- "psubw %[dest1_u], %[src0], %[src1] \n\t"
- "psraw %[dest1_u], %[dest1_u], %[eight] \n\t"
- "punpcklwd %[src0], %[dest1_v], %[src_hi] \n\t"
- "punpckhwd %[src1], %[dest1_v], %[src_hi] \n\t"
- "psubw %[dest1_v], %[src1], %[src0] \n\t"
- "psraw %[dest1_v], %[dest1_v], %[eight] \n\t"
-
- "gsldrc1 %[src0], 0x18(%[src_rgb]) \n\t"
- "gsldlc1 %[src0], 0x1f(%[src_rgb]) \n\t"
- "gsldrc1 %[src1], 0x18(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x1f(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "dsll %[src0], %[src0], %[eight] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "dsll %[src1], %[src1], %[eight] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "paddh %[src0], %[src0], %[ftmp12] \n\t"
- "psrlh %[src0], %[src0], %[one] \n\t"
- "dsll %[dest2_u], %[src0], %[sixteen] \n\t"
- "pinsrh_0 %[dest2_u], %[dest2_u], %[value] \n\t"
- "pinsrh_3 %[dest2_v], %[src0], %[value] \n\t"
- "pmaddhw %[dest2_u], %[dest2_u], %[mask_u] \n\t"
- "pmaddhw %[dest2_v], %[dest2_v], %[mask_v] \n\t"
-
- "gsldrc1 %[src0], 0x1e(%[src_rgb]) \n\t"
- "gsldlc1 %[src0], 0x25(%[src_rgb]) \n\t"
- "gsldrc1 %[src1], 0x1e(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x25(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "dsll %[src0], %[src0], %[eight] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "dsll %[src1], %[src1], %[eight] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "paddh %[src0], %[src0], %[ftmp12] \n\t"
- "psrlh %[src0], %[src0], %[one] \n\t"
- "dsll %[src_lo], %[src0], %[sixteen] \n\t"
- "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
- "pinsrh_3 %[src_hi], %[src0], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
-
- "punpcklwd %[src0], %[dest2_u], %[src_lo] \n\t"
- "punpckhwd %[src1], %[dest2_u], %[src_lo] \n\t"
- "psubw %[dest2_u], %[src0], %[src1] \n\t"
- "psraw %[dest2_u], %[dest2_u], %[eight] \n\t"
- "punpcklwd %[src0], %[dest2_v], %[src_hi] \n\t"
- "punpckhwd %[src1], %[dest2_v], %[src_hi] \n\t"
- "psubw %[dest2_v], %[src1], %[src0] \n\t"
- "psraw %[dest2_v], %[dest2_v], %[eight] \n\t"
-
- "gsldrc1 %[src0], 0x24(%[src_rgb]) \n\t"
- "gsldlc1 %[src0], 0x2b(%[src_rgb]) \n\t"
- "gsldrc1 %[src1], 0x24(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x2b(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "dsll %[src0], %[src0], %[eight] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "dsll %[src1], %[src1], %[eight] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "paddh %[src0], %[src0], %[ftmp12] \n\t"
- "psrlh %[src0], %[src0], %[one] \n\t"
- "dsll %[dest3_u], %[src0], %[sixteen] \n\t"
- "pinsrh_0 %[dest3_u], %[dest3_u], %[value] \n\t"
- "pinsrh_3 %[dest3_v], %[src0], %[value] \n\t"
- "pmaddhw %[dest3_u], %[dest3_u], %[mask_u] \n\t"
- "pmaddhw %[dest3_v], %[dest3_v], %[mask_v] \n\t"
-
- "gsldrc1 %[src0], 0x2a(%[src_rgb]) \n\t"
- "gsldlc1 %[src0], 0x31(%[src_rgb]) \n\t"
- "gsldrc1 %[src1], 0x2a(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x31(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "dsll %[src0], %[src0], %[eight] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "dsll %[src1], %[src1], %[eight] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "paddh %[src0], %[src0], %[ftmp12] \n\t"
- "psrlh %[src0], %[src0], %[one] \n\t"
- "dsll %[src_lo], %[src0], %[sixteen] \n\t"
- "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
- "pinsrh_3 %[src_hi], %[src0], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
-
- "punpcklwd %[src0], %[dest3_u], %[src_lo] \n\t"
- "punpckhwd %[src1], %[dest3_u], %[src_lo] \n\t"
- "psubw %[dest3_u], %[src0], %[src1] \n\t"
- "psraw %[dest3_u], %[dest3_u], %[eight] \n\t"
- "punpcklwd %[src0], %[dest3_v], %[src_hi] \n\t"
- "punpckhwd %[src1], %[dest3_v], %[src_hi] \n\t"
- "psubw %[dest3_v], %[src1], %[src0] \n\t"
- "psraw %[dest3_v], %[dest3_v], %[eight] \n\t"
-
- "packsswh %[src0], %[dest0_u], %[dest1_u] \n\t"
- "packsswh %[src1], %[dest2_u], %[dest3_u] \n\t"
- "packushb %[dest0_u], %[src0], %[src1] \n\t"
- "gssdlc1 %[dest0_u], 0x07(%[dst_u]) \n\t"
- "gssdrc1 %[dest0_u], 0x00(%[dst_u]) \n\t"
-
- "packsswh %[src0], %[dest0_v], %[dest1_v] \n\t"
- "packsswh %[src1], %[dest2_v], %[dest3_v] \n\t"
- "packushb %[dest0_v], %[src0], %[src1] \n\t"
- "gssdlc1 %[dest0_v], 0x07(%[dst_v]) \n\t"
- "gssdrc1 %[dest0_v], 0x00(%[dst_v]) \n\t"
-
- "daddiu %[src_rgb], %[src_rgb], 0x30 \n\t"
- "daddiu %[dst_u], %[dst_u], 0x08 \n\t"
- "daddiu %[dst_v], %[dst_v], 0x08 \n\t"
- "daddi %[width], %[width], -0x10 \n\t"
- "bgtz %[width], 1b \n\t"
- : [src_rgb1] "=&r"(src_rgb1), [src0] "=&f"(ftmp[0]),
- [src1] "=&f"(ftmp[1]), [src_lo] "=&f"(ftmp[2]), [src_hi] "=&f"(ftmp[3]),
- [dest0_u] "=&f"(ftmp[4]), [dest0_v] "=&f"(ftmp[5]),
- [dest1_u] "=&f"(ftmp[6]), [dest1_v] "=&f"(ftmp[7]),
- [dest2_u] "=&f"(ftmp[8]), [dest2_v] "=&f"(ftmp[9]),
- [dest3_u] "=&f"(ftmp[10]), [dest3_v] "=&f"(ftmp[11]),
- [ftmp12] "=&f"(ftmp[12]), [tmp0] "=&r"(tmp[0])
- : [src_rgb] "r"(src_rgb), [src_stride_rgb] "r"(src_stride_rgb),
- [dst_u] "r"(dst_u), [dst_v] "r"(dst_v), [width] "r"(width),
- [mask_u] "f"(mask_u), [mask_v] "f"(mask_v), [value] "f"(value),
- [zero] "f"(0x00), [eight] "f"(0x08), [one] "f"(0x01),
- [sixteen] "f"(0x10)
- : "memory");
-}
-
-void RAWToYRow_MMI(const uint8_t* src_argb, uint8_t* dst_y, int width) {
- uint64_t src, src_hi, src_lo;
- uint64_t dest0, dest1, dest2, dest3;
- const uint64_t value = 0x1080;
- const uint64_t mask = 0x0001001900810042;
-
- __asm__ volatile(
- "1: \n\t"
- "gsldlc1 %[src], 0x07(%[src_argb]) \n\t"
- "gsldrc1 %[src], 0x00(%[src_argb]) \n\t"
- "punpcklbh %[src_lo], %[src], %[zero] \n\t"
- "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
- "dsll %[src], %[src], %[eight] \n\t"
- "punpckhbh %[src_hi], %[src], %[zero] \n\t"
- "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
- "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
- "punpckhwd %[dest0], %[src_lo], %[src_hi] \n\t"
- "paddw %[dest0], %[dest0], %[src] \n\t"
- "psrlw %[dest0], %[dest0], %[eight] \n\t"
-
- "gsldlc1 %[src], 0x0d(%[src_argb]) \n\t"
- "gsldrc1 %[src], 0x06(%[src_argb]) \n\t"
- "punpcklbh %[src_lo], %[src], %[zero] \n\t"
- "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
- "dsll %[src], %[src], %[eight] \n\t"
- "punpckhbh %[src_hi], %[src], %[zero] \n\t"
- "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
- "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
- "punpckhwd %[dest1], %[src_lo], %[src_hi] \n\t"
- "paddw %[dest1], %[dest1], %[src] \n\t"
- "psrlw %[dest1], %[dest1], %[eight] \n\t"
-
- "gsldlc1 %[src], 0x13(%[src_argb]) \n\t"
- "gsldrc1 %[src], 0x0c(%[src_argb]) \n\t"
- "punpcklbh %[src_lo], %[src], %[zero] \n\t"
- "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
- "dsll %[src], %[src], %[eight] \n\t"
- "punpckhbh %[src_hi], %[src], %[zero] \n\t"
- "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
- "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
- "punpckhwd %[dest2], %[src_lo], %[src_hi] \n\t"
- "paddw %[dest2], %[dest2], %[src] \n\t"
- "psrlw %[dest2], %[dest2], %[eight] \n\t"
-
- "gsldlc1 %[src], 0x19(%[src_argb]) \n\t"
- "gsldrc1 %[src], 0x12(%[src_argb]) \n\t"
- "punpcklbh %[src_lo], %[src], %[zero] \n\t"
- "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
- "dsll %[src], %[src], %[eight] \n\t"
- "punpckhbh %[src_hi], %[src], %[zero] \n\t"
- "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
- "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
- "punpckhwd %[dest3], %[src_lo], %[src_hi] \n\t"
- "paddw %[dest3], %[dest3], %[src] \n\t"
- "psrlw %[dest3], %[dest3], %[eight] \n\t"
-
- "packsswh %[src_lo], %[dest0], %[dest1] \n\t"
- "packsswh %[src_hi], %[dest2], %[dest3] \n\t"
- "packushb %[dest0], %[src_lo], %[src_hi] \n\t"
- "gssdlc1 %[dest0], 0x07(%[dst_y]) \n\t"
- "gssdrc1 %[dest0], 0x00(%[dst_y]) \n\t"
-
- "daddiu %[src_argb], %[src_argb], 0x18 \n\t"
- "daddiu %[dst_y], %[dst_y], 0x08 \n\t"
- "daddi %[width], %[width], -0x08 \n\t"
- "bnez %[width], 1b \n\t"
- : [src] "=&f"(src), [src_hi] "=&f"(src_hi), [src_lo] "=&f"(src_lo),
- [dest0] "=&f"(dest0), [dest1] "=&f"(dest1), [dest2] "=&f"(dest2),
- [dest3] "=&f"(dest3)
- : [src_argb] "r"(src_argb), [dst_y] "r"(dst_y), [width] "r"(width),
- [mask] "f"(mask), [value] "f"(value), [eight] "f"(0x08),
- [zero] "f"(0x00)
- : "memory");
-}
-
-void RAWToUVRow_MMI(const uint8_t* src_rgb,
- int src_stride_rgb,
- uint8_t* dst_u,
- uint8_t* dst_v,
- int width) {
- uint64_t src_rgb1;
- uint64_t ftmp[13];
- uint64_t tmp[1];
- const uint64_t value = 0x4040;
- const uint64_t mask_u = 0x0002003800250013;
- const uint64_t mask_v = 0x0009002f00380002;
-
- __asm__ volatile(
- "dli %[tmp0], 0x0001000100010001 \n\t"
- "dmtc1 %[tmp0], %[ftmp12] \n\t"
- "1: \n\t"
- "daddu %[src_rgb1], %[src_rgb], %[src_stride_rgb] \n\t"
- "gsldrc1 %[src0], 0x00(%[src_rgb]) \n\t"
- "gsldlc1 %[src0], 0x07(%[src_rgb]) \n\t"
- "gsldrc1 %[src1], 0x00(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x07(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "dsll %[src0], %[src0], %[eight] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "dsll %[src1], %[src1], %[eight] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "paddh %[src0], %[src0], %[ftmp12] \n\t"
- "psrlh %[src0], %[src0], %[one] \n\t"
- "pinsrh_3 %[dest0_u], %[src0], %[value] \n\t"
- "dsll %[dest0_v], %[src0], %[sixteen] \n\t"
- "pinsrh_0 %[dest0_v], %[dest0_v], %[value] \n\t"
- "pmaddhw %[dest0_u], %[dest0_u], %[mask_u] \n\t"
- "pmaddhw %[dest0_v], %[dest0_v], %[mask_v] \n\t"
-
- "gsldrc1 %[src0], 0x06(%[src_rgb]) \n\t"
- "gsldlc1 %[src0], 0x0d(%[src_rgb]) \n\t"
- "gsldrc1 %[src1], 0x06(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x0d(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "dsll %[src0], %[src0], %[eight] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "dsll %[src1], %[src1], %[eight] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "paddh %[src0], %[src0], %[ftmp12] \n\t"
- "psrlh %[src0], %[src0], %[one] \n\t"
- "pinsrh_3 %[src_lo], %[src0], %[value] \n\t"
- "dsll %[src_hi], %[src0], %[sixteen] \n\t"
- "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
-
- "punpcklwd %[src0], %[dest0_u], %[src_lo] \n\t"
- "punpckhwd %[src1], %[dest0_u], %[src_lo] \n\t"
- "psubw %[dest0_u], %[src1], %[src0] \n\t"
- "psraw %[dest0_u], %[dest0_u], %[eight] \n\t"
- "punpcklwd %[src0], %[dest0_v], %[src_hi] \n\t"
- "punpckhwd %[src1], %[dest0_v], %[src_hi] \n\t"
- "psubw %[dest0_v], %[src0], %[src1] \n\t"
- "psraw %[dest0_v], %[dest0_v], %[eight] \n\t"
-
- "gsldrc1 %[src0], 0x0c(%[src_rgb]) \n\t"
- "gsldlc1 %[src0], 0x13(%[src_rgb]) \n\t"
- "gsldrc1 %[src1], 0x0c(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x13(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "dsll %[src0], %[src0], %[eight] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "dsll %[src1], %[src1], %[eight] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "paddh %[src0], %[src0], %[ftmp12] \n\t"
- "psrlh %[src0], %[src0], %[one] \n\t"
- "pinsrh_3 %[dest1_u], %[src0], %[value] \n\t"
- "dsll %[dest1_v], %[src0], %[sixteen] \n\t"
- "pinsrh_0 %[dest1_v], %[dest1_v], %[value] \n\t"
- "pmaddhw %[dest1_u], %[dest1_u], %[mask_u] \n\t"
- "pmaddhw %[dest1_v], %[dest1_v], %[mask_v] \n\t"
-
- "gsldrc1 %[src0], 0x12(%[src_rgb]) \n\t"
- "gsldlc1 %[src0], 0x19(%[src_rgb]) \n\t"
- "gsldrc1 %[src1], 0x12(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x19(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "dsll %[src0], %[src0], %[eight] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "dsll %[src1], %[src1], %[eight] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "paddh %[src0], %[src0], %[ftmp12] \n\t"
- "psrlh %[src0], %[src0], %[one] \n\t"
- "pinsrh_3 %[src_lo], %[src0], %[value] \n\t"
- "dsll %[src_hi], %[src0], %[sixteen] \n\t"
- "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
-
- "punpcklwd %[src0], %[dest1_u], %[src_lo] \n\t"
- "punpckhwd %[src1], %[dest1_u], %[src_lo] \n\t"
- "psubw %[dest1_u], %[src1], %[src0] \n\t"
- "psraw %[dest1_u], %[dest1_u], %[eight] \n\t"
- "punpcklwd %[src0], %[dest1_v], %[src_hi] \n\t"
- "punpckhwd %[src1], %[dest1_v], %[src_hi] \n\t"
- "psubw %[dest1_v], %[src0], %[src1] \n\t"
- "psraw %[dest1_v], %[dest1_v], %[eight] \n\t"
-
- "gsldrc1 %[src0], 0x18(%[src_rgb]) \n\t"
- "gsldlc1 %[src0], 0x1f(%[src_rgb]) \n\t"
- "gsldrc1 %[src1], 0x18(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x1f(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "dsll %[src0], %[src0], %[eight] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "dsll %[src1], %[src1], %[eight] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "paddh %[src0], %[src0], %[ftmp12] \n\t"
- "psrlh %[src0], %[src0], %[one] \n\t"
- "pinsrh_3 %[dest2_u], %[src0], %[value] \n\t"
- "dsll %[dest2_v], %[src0], %[sixteen] \n\t"
- "pinsrh_0 %[dest2_v], %[dest2_v], %[value] \n\t"
- "pmaddhw %[dest2_u], %[dest2_u], %[mask_u] \n\t"
- "pmaddhw %[dest2_v], %[dest2_v], %[mask_v] \n\t"
-
- "gsldrc1 %[src0], 0x1e(%[src_rgb]) \n\t"
- "gsldlc1 %[src0], 0x25(%[src_rgb]) \n\t"
- "gsldrc1 %[src1], 0x1e(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x25(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "dsll %[src0], %[src0], %[eight] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "dsll %[src1], %[src1], %[eight] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "paddh %[src0], %[src0], %[ftmp12] \n\t"
- "psrlh %[src0], %[src0], %[one] \n\t"
- "pinsrh_3 %[src_lo], %[src0], %[value] \n\t"
- "dsll %[src_hi], %[src0], %[sixteen] \n\t"
- "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
-
- "punpcklwd %[src0], %[dest2_u], %[src_lo] \n\t"
- "punpckhwd %[src1], %[dest2_u], %[src_lo] \n\t"
- "psubw %[dest2_u], %[src1], %[src0] \n\t"
- "psraw %[dest2_u], %[dest2_u], %[eight] \n\t"
- "punpcklwd %[src0], %[dest2_v], %[src_hi] \n\t"
- "punpckhwd %[src1], %[dest2_v], %[src_hi] \n\t"
- "psubw %[dest2_v], %[src0], %[src1] \n\t"
- "psraw %[dest2_v], %[dest2_v], %[eight] \n\t"
-
- "gsldrc1 %[src0], 0x24(%[src_rgb]) \n\t"
- "gsldlc1 %[src0], 0x2b(%[src_rgb]) \n\t"
- "gsldrc1 %[src1], 0x24(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x2b(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "dsll %[src0], %[src0], %[eight] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "dsll %[src1], %[src1], %[eight] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "paddh %[src0], %[src0], %[ftmp12] \n\t"
- "psrlh %[src0], %[src0], %[one] \n\t"
- "pinsrh_3 %[dest3_u], %[src0], %[value] \n\t"
- "dsll %[dest3_v], %[src0], %[sixteen] \n\t"
- "pinsrh_0 %[dest3_v], %[dest3_v], %[value] \n\t"
- "pmaddhw %[dest3_u], %[dest3_u], %[mask_u] \n\t"
- "pmaddhw %[dest3_v], %[dest3_v], %[mask_v] \n\t"
-
- "gsldrc1 %[src0], 0x2a(%[src_rgb]) \n\t"
- "gsldlc1 %[src0], 0x31(%[src_rgb]) \n\t"
- "gsldrc1 %[src1], 0x2a(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x31(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "dsll %[src0], %[src0], %[eight] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "dsll %[src1], %[src1], %[eight] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "paddh %[src0], %[src0], %[ftmp12] \n\t"
- "psrlh %[src0], %[src0], %[one] \n\t"
- "pinsrh_3 %[src_lo], %[src0], %[value] \n\t"
- "dsll %[src_hi], %[src0], %[sixteen] \n\t"
- "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
-
- "punpcklwd %[src0], %[dest3_u], %[src_lo] \n\t"
- "punpckhwd %[src1], %[dest3_u], %[src_lo] \n\t"
- "psubw %[dest3_u], %[src1], %[src0] \n\t"
- "psraw %[dest3_u], %[dest3_u], %[eight] \n\t"
- "punpcklwd %[src0], %[dest3_v], %[src_hi] \n\t"
- "punpckhwd %[src1], %[dest3_v], %[src_hi] \n\t"
- "psubw %[dest3_v], %[src0], %[src1] \n\t"
- "psraw %[dest3_v], %[dest3_v], %[eight] \n\t"
-
- "packsswh %[src0], %[dest0_u], %[dest1_u] \n\t"
- "packsswh %[src1], %[dest2_u], %[dest3_u] \n\t"
- "packushb %[dest0_u], %[src0], %[src1] \n\t"
- "gssdlc1 %[dest0_u], 0x07(%[dst_u]) \n\t"
- "gssdrc1 %[dest0_u], 0x00(%[dst_u]) \n\t"
-
- "packsswh %[src0], %[dest0_v], %[dest1_v] \n\t"
- "packsswh %[src1], %[dest2_v], %[dest3_v] \n\t"
- "packushb %[dest0_v], %[src0], %[src1] \n\t"
- "gssdlc1 %[dest0_v], 0x07(%[dst_v]) \n\t"
- "gssdrc1 %[dest0_v], 0x00(%[dst_v]) \n\t"
-
- "daddiu %[src_rgb], %[src_rgb], 0x30 \n\t"
- "daddiu %[dst_u], %[dst_u], 0x08 \n\t"
- "daddiu %[dst_v], %[dst_v], 0x08 \n\t"
- "daddi %[width], %[width], -0x10 \n\t"
- "bgtz %[width], 1b \n\t"
- : [src_rgb1] "=&r"(src_rgb1), [src0] "=&f"(ftmp[0]),
- [src1] "=&f"(ftmp[1]), [src_lo] "=&f"(ftmp[2]), [src_hi] "=&f"(ftmp[3]),
- [dest0_u] "=&f"(ftmp[4]), [dest0_v] "=&f"(ftmp[5]),
- [dest1_u] "=&f"(ftmp[6]), [dest1_v] "=&f"(ftmp[7]),
- [dest2_u] "=&f"(ftmp[8]), [dest2_v] "=&f"(ftmp[9]),
- [dest3_u] "=&f"(ftmp[10]), [dest3_v] "=&f"(ftmp[11]),
- [ftmp12] "=&f"(ftmp[12]), [tmp0] "=&r"(tmp[0])
- : [src_rgb] "r"(src_rgb), [src_stride_rgb] "r"(src_stride_rgb),
- [dst_u] "r"(dst_u), [dst_v] "r"(dst_v), [width] "r"(width),
- [mask_u] "f"(mask_u), [mask_v] "f"(mask_v), [value] "f"(value),
- [zero] "f"(0x00), [eight] "f"(0x08), [one] "f"(0x01),
- [sixteen] "f"(0x10)
- : "memory");
-}
-
-void ARGBToYJRow_MMI(const uint8_t* src_argb, uint8_t* dst_y, int width) {
- uint64_t src, src_hi, src_lo;
- uint64_t dest, dest0, dest1, dest2, dest3;
- uint64_t tmp0, tmp1;
- const uint64_t shift = 0x08;
- const uint64_t value = 0x80;
- const uint64_t mask0 = 0x0;
- const uint64_t mask1 = 0x0001004D0096001DULL;
-
- __asm__ volatile(
- "1: \n\t"
- "gsldlc1 %[src], 0x07(%[src_ptr]) \n\t"
- "gsldrc1 %[src], 0x00(%[src_ptr]) \n\t"
- "punpcklbh %[src_lo], %[src], %[mask0] \n\t"
- "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask1] \n\t"
- "punpckhbh %[src_hi], %[src], %[mask0] \n\t"
- "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask1] \n\t"
- "punpcklwd %[tmp0], %[src_lo], %[src_hi] \n\t"
- "punpckhwd %[tmp1], %[src_lo], %[src_hi] \n\t"
- "paddw %[dest0], %[tmp0], %[tmp1] \n\t"
- "psrlw %[dest0], %[dest0], %[shift] \n\t"
-
- "gsldlc1 %[src], 0x0f(%[src_ptr]) \n\t"
- "gsldrc1 %[src], 0x08(%[src_ptr]) \n\t"
- "punpcklbh %[src_lo], %[src], %[mask0] \n\t"
- "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask1] \n\t"
- "punpckhbh %[src_hi], %[src], %[mask0] \n\t"
- "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask1] \n\t"
- "punpcklwd %[tmp0], %[src_lo], %[src_hi] \n\t"
- "punpckhwd %[tmp1], %[src_lo], %[src_hi] \n\t"
- "paddw %[dest1], %[tmp0], %[tmp1] \n\t"
- "psrlw %[dest1], %[dest1], %[shift] \n\t"
-
- "gsldlc1 %[src], 0x17(%[src_ptr]) \n\t"
- "gsldrc1 %[src], 0x10(%[src_ptr]) \n\t"
- "punpcklbh %[src_lo], %[src], %[mask0] \n\t"
- "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask1] \n\t"
- "punpckhbh %[src_hi], %[src], %[mask0] \n\t"
- "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask1] \n\t"
- "punpcklwd %[tmp0], %[src_lo], %[src_hi] \n\t"
- "punpckhwd %[tmp1], %[src_lo], %[src_hi] \n\t"
- "paddw %[dest2], %[tmp0], %[tmp1] \n\t"
- "psrlw %[dest2], %[dest2], %[shift] \n\t"
-
- "gsldlc1 %[src], 0x1f(%[src_ptr]) \n\t"
- "gsldrc1 %[src], 0x18(%[src_ptr]) \n\t"
- "punpcklbh %[src_lo], %[src], %[mask0] \n\t"
- "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask1] \n\t"
- "punpckhbh %[src_hi], %[src], %[mask0] \n\t"
- "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask1] \n\t"
- "punpcklwd %[tmp0], %[src_lo], %[src_hi] \n\t"
- "punpckhwd %[tmp1], %[src_lo], %[src_hi] \n\t"
- "paddw %[dest3], %[tmp0], %[tmp1] \n\t"
- "psrlw %[dest3], %[dest3], %[shift] \n\t"
-
- "packsswh %[tmp0], %[dest0], %[dest1] \n\t"
- "packsswh %[tmp1], %[dest2], %[dest3] \n\t"
- "packushb %[dest], %[tmp0], %[tmp1] \n\t"
- "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
- "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
-
- "daddiu %[src_ptr], %[src_ptr], 0x20 \n\t"
- "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t"
- "daddi %[width], %[width], -0x08 \n\t"
- "bnez %[width], 1b \n\t"
- : [src] "=&f"(src), [dest] "=&f"(dest), [src_hi] "=&f"(src_hi),
- [src_lo] "=&f"(src_lo), [dest0] "=&f"(dest0), [dest1] "=&f"(dest1),
- [dest2] "=&f"(dest2), [dest3] "=&f"(dest3), [tmp0] "=&f"(tmp0),
- [tmp1] "=&f"(tmp1)
- : [src_ptr] "r"(src_argb), [dst_ptr] "r"(dst_y), [mask0] "f"(mask0),
- [mask1] "f"(mask1), [shift] "f"(shift), [value] "f"(value),
- [width] "r"(width)
- : "memory");
-}
-
-void ARGBToUVJRow_MMI(const uint8_t* src_rgb,
- int src_stride_rgb,
- uint8_t* dst_u,
- uint8_t* dst_v,
- int width) {
- uint64_t src_rgb1;
- uint64_t ftmp[12];
- const uint64_t value = 0x4040;
- const uint64_t mask_u = 0x0015002a003f0002;
- const uint64_t mask_v = 0x0002003f0035000a;
-
- __asm__ volatile(
- "1: \n\t"
- "daddu %[src_rgb1], %[src_rgb], %[src_stride_rgb] \n\t"
- "gsldrc1 %[src0], 0x00(%[src_rgb]) \n\t"
- "gsldlc1 %[src0], 0x07(%[src_rgb]) \n\t"
- "gsldrc1 %[src1], 0x00(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x07(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "punpcklbh %[src0], %[src1], %[zero] \n\t"
- "punpckhbh %[src1], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src0] \n\t"
- "paddh %[src1], %[src_hi], %[src1] \n\t"
- "pavgh %[src0], %[src0], %[src1] \n\t"
- "dsll %[dest0_u], %[src0], %[sixteen] \n\t"
- "pinsrh_0 %[dest0_u], %[dest0_u], %[value] \n\t"
- "pinsrh_3 %[dest0_v], %[src0], %[value] \n\t"
- "pmaddhw %[dest0_u], %[dest0_u], %[mask_u] \n\t"
- "pmaddhw %[dest0_v], %[dest0_v], %[mask_v] \n\t"
-
- "gsldrc1 %[src0], 0x08(%[src_rgb]) \n\t"
- "gsldlc1 %[src0], 0x0f(%[src_rgb]) \n\t"
- "gsldrc1 %[src1], 0x08(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x0f(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "punpcklbh %[src0], %[src1], %[zero] \n\t"
- "punpckhbh %[src1], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src0] \n\t"
- "paddh %[src1], %[src_hi], %[src1] \n\t"
- "pavgh %[src0], %[src0], %[src1] \n\t"
- "dsll %[src_lo], %[src0], %[sixteen] \n\t"
- "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
- "pinsrh_3 %[src_hi], %[src0], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
-
- "punpcklwd %[src0], %[dest0_u], %[src_lo] \n\t"
- "punpckhwd %[src1], %[dest0_u], %[src_lo] \n\t"
- "psubw %[dest0_u], %[src0], %[src1] \n\t"
- "psraw %[dest0_u], %[dest0_u], %[eight] \n\t"
- "punpcklwd %[src0], %[dest0_v], %[src_hi] \n\t"
- "punpckhwd %[src1], %[dest0_v], %[src_hi] \n\t"
- "psubw %[dest0_v], %[src1], %[src0] \n\t"
- "psraw %[dest0_v], %[dest0_v], %[eight] \n\t"
-
- "gsldrc1 %[src0], 0x10(%[src_rgb]) \n\t"
- "gsldlc1 %[src0], 0x17(%[src_rgb]) \n\t"
- "gsldrc1 %[src1], 0x10(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x17(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "punpcklbh %[src0], %[src1], %[zero] \n\t"
- "punpckhbh %[src1], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src0] \n\t"
- "paddh %[src1], %[src_hi], %[src1] \n\t"
- "pavgh %[src0], %[src0], %[src1] \n\t"
- "dsll %[dest1_u], %[src0], %[sixteen] \n\t"
- "pinsrh_0 %[dest1_u], %[dest1_u], %[value] \n\t"
- "pinsrh_3 %[dest1_v], %[src0], %[value] \n\t"
- "pmaddhw %[dest1_u], %[dest1_u], %[mask_u] \n\t"
- "pmaddhw %[dest1_v], %[dest1_v], %[mask_v] \n\t"
-
- "gsldrc1 %[src0], 0x18(%[src_rgb]) \n\t"
- "gsldlc1 %[src0], 0x1f(%[src_rgb]) \n\t"
- "gsldrc1 %[src1], 0x18(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x1f(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "punpcklbh %[src0], %[src1], %[zero] \n\t"
- "punpckhbh %[src1], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src0] \n\t"
- "paddh %[src1], %[src_hi], %[src1] \n\t"
- "pavgh %[src0], %[src0], %[src1] \n\t"
- "dsll %[src_lo], %[src0], %[sixteen] \n\t"
- "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
- "pinsrh_3 %[src_hi], %[src0], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
-
- "punpcklwd %[src0], %[dest1_u], %[src_lo] \n\t"
- "punpckhwd %[src1], %[dest1_u], %[src_lo] \n\t"
- "psubw %[dest1_u], %[src0], %[src1] \n\t"
- "psraw %[dest1_u], %[dest1_u], %[eight] \n\t"
- "punpcklwd %[src0], %[dest1_v], %[src_hi] \n\t"
- "punpckhwd %[src1], %[dest1_v], %[src_hi] \n\t"
- "psubw %[dest1_v], %[src1], %[src0] \n\t"
- "psraw %[dest1_v], %[dest1_v], %[eight] \n\t"
-
- "gsldrc1 %[src0], 0x20(%[src_rgb]) \n\t"
- "gsldlc1 %[src0], 0x27(%[src_rgb]) \n\t"
- "gsldrc1 %[src1], 0x20(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x27(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "punpcklbh %[src0], %[src1], %[zero] \n\t"
- "punpckhbh %[src1], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src0] \n\t"
- "paddh %[src1], %[src_hi], %[src1] \n\t"
- "pavgh %[src0], %[src0], %[src1] \n\t"
- "dsll %[dest2_u], %[src0], %[sixteen] \n\t"
- "pinsrh_0 %[dest2_u], %[dest2_u], %[value] \n\t"
- "pinsrh_3 %[dest2_v], %[src0], %[value] \n\t"
- "pmaddhw %[dest2_u], %[dest2_u], %[mask_u] \n\t"
- "pmaddhw %[dest2_v], %[dest2_v], %[mask_v] \n\t"
-
- "gsldrc1 %[src0], 0x28(%[src_rgb]) \n\t"
- "gsldlc1 %[src0], 0x2f(%[src_rgb]) \n\t"
- "gsldrc1 %[src1], 0x28(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x2f(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "punpcklbh %[src0], %[src1], %[zero] \n\t"
- "punpckhbh %[src1], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src0] \n\t"
- "paddh %[src1], %[src_hi], %[src1] \n\t"
- "pavgh %[src0], %[src0], %[src1] \n\t"
- "dsll %[src_lo], %[src0], %[sixteen] \n\t"
- "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
- "pinsrh_3 %[src_hi], %[src0], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
-
- "punpcklwd %[src0], %[dest2_u], %[src_lo] \n\t"
- "punpckhwd %[src1], %[dest2_u], %[src_lo] \n\t"
- "psubw %[dest2_u], %[src0], %[src1] \n\t"
- "psraw %[dest2_u], %[dest2_u], %[eight] \n\t"
- "punpcklwd %[src0], %[dest2_v], %[src_hi] \n\t"
- "punpckhwd %[src1], %[dest2_v], %[src_hi] \n\t"
- "psubw %[dest2_v], %[src1], %[src0] \n\t"
- "psraw %[dest2_v], %[dest2_v], %[eight] \n\t"
-
- "gsldrc1 %[src0], 0x30(%[src_rgb]) \n\t"
- "gsldlc1 %[src0], 0x37(%[src_rgb]) \n\t"
- "gsldrc1 %[src1], 0x30(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x37(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "punpcklbh %[src0], %[src1], %[zero] \n\t"
- "punpckhbh %[src1], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src0] \n\t"
- "paddh %[src1], %[src_hi], %[src1] \n\t"
- "pavgh %[src0], %[src0], %[src1] \n\t"
- "dsll %[dest3_u], %[src0], %[sixteen] \n\t"
- "pinsrh_0 %[dest3_u], %[dest3_u], %[value] \n\t"
- "pinsrh_3 %[dest3_v], %[src0], %[value] \n\t"
- "pmaddhw %[dest3_u], %[dest3_u], %[mask_u] \n\t"
- "pmaddhw %[dest3_v], %[dest3_v], %[mask_v] \n\t"
-
- "gsldrc1 %[src0], 0x38(%[src_rgb]) \n\t"
- "gsldlc1 %[src0], 0x3f(%[src_rgb]) \n\t"
- "gsldrc1 %[src1], 0x38(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x3f(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "punpcklbh %[src0], %[src1], %[zero] \n\t"
- "punpckhbh %[src1], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src0] \n\t"
- "paddh %[src1], %[src_hi], %[src1] \n\t"
- "pavgh %[src0], %[src0], %[src1] \n\t"
- "dsll %[src_lo], %[src0], %[sixteen] \n\t"
- "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
- "pinsrh_3 %[src_hi], %[src0], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
-
- "punpcklwd %[src0], %[dest3_u], %[src_lo] \n\t"
- "punpckhwd %[src1], %[dest3_u], %[src_lo] \n\t"
- "psubw %[dest3_u], %[src0], %[src1] \n\t"
- "psraw %[dest3_u], %[dest3_u], %[eight] \n\t"
- "punpcklwd %[src0], %[dest3_v], %[src_hi] \n\t"
- "punpckhwd %[src1], %[dest3_v], %[src_hi] \n\t"
- "psubw %[dest3_v], %[src1], %[src0] \n\t"
- "psraw %[dest3_v], %[dest3_v], %[eight] \n\t"
-
- "packsswh %[src0], %[dest0_u], %[dest1_u] \n\t"
- "packsswh %[src1], %[dest2_u], %[dest3_u] \n\t"
- "packushb %[dest0_u], %[src0], %[src1] \n\t"
- "gssdlc1 %[dest0_u], 0x07(%[dst_u]) \n\t"
- "gssdrc1 %[dest0_u], 0x00(%[dst_u]) \n\t"
-
- "packsswh %[src0], %[dest0_v], %[dest1_v] \n\t"
- "packsswh %[src1], %[dest2_v], %[dest3_v] \n\t"
- "packushb %[dest0_v], %[src0], %[src1] \n\t"
- "gssdlc1 %[dest0_v], 0x07(%[dst_v]) \n\t"
- "gssdrc1 %[dest0_v], 0x00(%[dst_v]) \n\t"
-
- "daddiu %[src_rgb], %[src_rgb], 0x40 \n\t"
- "daddiu %[dst_u], %[dst_u], 0x08 \n\t"
- "daddiu %[dst_v], %[dst_v], 0x08 \n\t"
- "daddi %[width], %[width], -0x10 \n\t"
- "bgtz %[width], 1b \n\t"
- : [src_rgb1] "=&r"(src_rgb1), [src0] "=&f"(ftmp[0]),
- [src1] "=&f"(ftmp[1]), [src_lo] "=&f"(ftmp[2]), [src_hi] "=&f"(ftmp[3]),
- [dest0_u] "=&f"(ftmp[4]), [dest0_v] "=&f"(ftmp[5]),
- [dest1_u] "=&f"(ftmp[6]), [dest1_v] "=&f"(ftmp[7]),
- [dest2_u] "=&f"(ftmp[8]), [dest2_v] "=&f"(ftmp[9]),
- [dest3_u] "=&f"(ftmp[10]), [dest3_v] "=&f"(ftmp[11])
- : [src_rgb] "r"(src_rgb), [src_stride_rgb] "r"(src_stride_rgb),
- [dst_u] "r"(dst_u), [dst_v] "r"(dst_v), [width] "r"(width),
- [mask_u] "f"(mask_u), [mask_v] "f"(mask_v), [value] "f"(value),
- [zero] "f"(0x00), [eight] "f"(0x08),
- [sixteen] "f"(0x10)
- : "memory");
-}
-
-void RGB565ToYRow_MMI(const uint8_t* src_rgb565, uint8_t* dst_y, int width) {
- uint64_t ftmp[11];
- const uint64_t value = 0x1080108010801080;
- const uint64_t mask = 0x0001004200810019;
- uint64_t c0 = 0x001f001f001f001f;
- uint64_t c1 = 0x00ff00ff00ff00ff;
- uint64_t c2 = 0x0007000700070007;
- __asm__ volatile(
- "1: \n\t"
- "gsldrc1 %[src0], 0x00(%[src_rgb565]) \n\t"
- "gsldlc1 %[src0], 0x07(%[src_rgb565]) \n\t"
- "psrlh %[src1], %[src0], %[eight] \n\t"
- "and %[b], %[src0], %[c0] \n\t"
- "and %[src0], %[src0], %[c1] \n\t"
- "psrlh %[src0], %[src0], %[five] \n\t"
- "and %[g], %[src1], %[c2] \n\t"
- "psllh %[g], %[g], %[three] \n\t"
- "or %[g], %[src0], %[g] \n\t"
- "psrlh %[r], %[src1], %[three] \n\t"
- "psllh %[src0], %[b], %[three] \n\t"
- "psrlh %[src1], %[b], %[two] \n\t"
- "or %[b], %[src0], %[src1] \n\t"
- "psllh %[src0], %[g], %[two] \n\t"
- "psrlh %[src1], %[g], %[four] \n\t"
- "or %[g], %[src0], %[src1] \n\t"
- "psllh %[src0], %[r], %[three] \n\t"
- "psrlh %[src1], %[r], %[two] \n\t"
- "or %[r], %[src0], %[src1] \n\t"
- "punpcklhw %[src0], %[b], %[r] \n\t"
- "punpcklhw %[src1], %[g], %[value] \n\t"
- "punpcklhw %[src_lo], %[src0], %[src1] \n\t"
- "punpckhhw %[src_hi], %[src0], %[src1] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
- "punpcklwd %[src0], %[src_lo], %[src_hi] \n\t"
- "punpckhwd %[src1], %[src_lo], %[src_hi] \n\t"
- "paddw %[dest0], %[src0], %[src1] \n\t"
- "psrlw %[dest0], %[dest0], %[eight] \n\t"
-
- "punpckhhw %[src0], %[b], %[r] \n\t"
- "punpckhhw %[src1], %[g], %[value] \n\t"
- "punpcklhw %[src_lo], %[src0], %[src1] \n\t"
- "punpckhhw %[src_hi], %[src0], %[src1] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
- "punpcklwd %[src0], %[src_lo], %[src_hi] \n\t"
- "punpckhwd %[src1], %[src_lo], %[src_hi] \n\t"
- "paddw %[dest1], %[src0], %[src1] \n\t"
- "psrlw %[dest1], %[dest1], %[eight] \n\t"
-
- "gsldrc1 %[src0], 0x08(%[src_rgb565]) \n\t"
- "gsldlc1 %[src0], 0x0f(%[src_rgb565]) \n\t"
- "psrlh %[src1], %[src0], %[eight] \n\t"
- "and %[b], %[src0], %[c0] \n\t"
- "and %[src0], %[src0], %[c1] \n\t"
- "psrlh %[src0], %[src0], %[five] \n\t"
- "and %[g], %[src1], %[c2] \n\t"
- "psllh %[g], %[g], %[three] \n\t"
- "or %[g], %[src0], %[g] \n\t"
- "psrlh %[r], %[src1], %[three] \n\t"
- "psllh %[src0], %[b], %[three] \n\t"
- "psrlh %[src1], %[b], %[two] \n\t"
- "or %[b], %[src0], %[src1] \n\t"
- "psllh %[src0], %[g], %[two] \n\t"
- "psrlh %[src1], %[g], %[four] \n\t"
- "or %[g], %[src0], %[src1] \n\t"
- "psllh %[src0], %[r], %[three] \n\t"
- "psrlh %[src1], %[r], %[two] \n\t"
- "or %[r], %[src0], %[src1] \n\t"
- "punpcklhw %[src0], %[b], %[r] \n\t"
- "punpcklhw %[src1], %[g], %[value] \n\t"
- "punpcklhw %[src_lo], %[src0], %[src1] \n\t"
- "punpckhhw %[src_hi], %[src0], %[src1] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
- "punpcklwd %[src0], %[src_lo], %[src_hi] \n\t"
- "punpckhwd %[src1], %[src_lo], %[src_hi] \n\t"
- "paddw %[dest2], %[src0], %[src1] \n\t"
- "psrlw %[dest2], %[dest2], %[eight] \n\t"
-
- "punpckhhw %[src0], %[b], %[r] \n\t"
- "punpckhhw %[src1], %[g], %[value] \n\t"
- "punpcklhw %[src_lo], %[src0], %[src1] \n\t"
- "punpckhhw %[src_hi], %[src0], %[src1] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
- "punpcklwd %[src0], %[src_lo], %[src_hi] \n\t"
- "punpckhwd %[src1], %[src_lo], %[src_hi] \n\t"
- "paddw %[dest3], %[src0], %[src1] \n\t"
- "psrlw %[dest3], %[dest3], %[eight] \n\t"
-
- "packsswh %[src_lo], %[dest0], %[dest1] \n\t"
- "packsswh %[src_hi], %[dest2], %[dest3] \n\t"
- "packushb %[dest0], %[src_lo], %[src_hi] \n\t"
- "gssdlc1 %[dest0], 0x07(%[dst_y]) \n\t"
- "gssdrc1 %[dest0], 0x00(%[dst_y]) \n\t"
-
- "daddiu %[src_rgb565], %[src_rgb565], 0x10 \n\t"
- "daddiu %[dst_y], %[dst_y], 0x08 \n\t"
- "daddiu %[width], %[width], -0x08 \n\t"
- "bgtz %[width], 1b \n\t"
- : [src0] "=&f"(ftmp[0]), [src1] "=&f"(ftmp[1]), [src_lo] "=&f"(ftmp[2]),
- [src_hi] "=&f"(ftmp[3]), [b] "=&f"(ftmp[4]), [g] "=&f"(ftmp[5]),
- [r] "=&f"(ftmp[6]), [dest0] "=&f"(ftmp[7]), [dest1] "=&f"(ftmp[8]),
- [dest2] "=&f"(ftmp[9]), [dest3] "=&f"(ftmp[10])
- : [src_rgb565] "r"(src_rgb565), [dst_y] "r"(dst_y), [value] "f"(value),
- [width] "r"(width), [c0] "f"(c0), [c1] "f"(c1), [c2] "f"(c2),
- [mask] "f"(mask), [eight] "f"(0x08), [five] "f"(0x05),
- [three] "f"(0x03), [two] "f"(0x02), [four] "f"(0x04)
- : "memory");
-}
-
-void ARGB1555ToYRow_MMI(const uint8_t* src_argb1555,
- uint8_t* dst_y,
- int width) {
- uint64_t ftmp[11];
- const uint64_t value = 0x1080108010801080;
- const uint64_t mask = 0x0001004200810019;
- uint64_t c0 = 0x001f001f001f001f;
- uint64_t c1 = 0x00ff00ff00ff00ff;
- uint64_t c2 = 0x0003000300030003;
- uint64_t c3 = 0x007c007c007c007c;
- __asm__ volatile(
- "1: \n\t"
- "gsldrc1 %[src0], 0x00(%[src_argb1555]) \n\t"
- "gsldlc1 %[src0], 0x07(%[src_argb1555]) \n\t"
- "psrlh %[src1], %[src0], %[eight] \n\t"
- "and %[b], %[src0], %[c0] \n\t"
- "and %[src0], %[src0], %[c1] \n\t"
- "psrlh %[src0], %[src0], %[five] \n\t"
- "and %[g], %[src1], %[c2] \n\t"
- "psllh %[g], %[g], %[three] \n\t"
- "or %[g], %[src0], %[g] \n\t"
- "and %[r], %[src1], %[c3] \n\t"
- "psrlh %[r], %[r], %[two] \n\t"
- "psllh %[src0], %[b], %[three] \n\t"
- "psrlh %[src1], %[b], %[two] \n\t"
- "or %[b], %[src0], %[src1] \n\t"
- "psllh %[src0], %[g], %[three] \n\t"
- "psrlh %[src1], %[g], %[two] \n\t"
- "or %[g], %[src0], %[src1] \n\t"
- "psllh %[src0], %[r], %[three] \n\t"
- "psrlh %[src1], %[r], %[two] \n\t"
- "or %[r], %[src0], %[src1] \n\t"
- "punpcklhw %[src0], %[b], %[r] \n\t"
- "punpcklhw %[src1], %[g], %[value] \n\t"
- "punpcklhw %[src_lo], %[src0], %[src1] \n\t"
- "punpckhhw %[src_hi], %[src0], %[src1] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
- "punpcklwd %[src0], %[src_lo], %[src_hi] \n\t"
- "punpckhwd %[src1], %[src_lo], %[src_hi] \n\t"
- "paddw %[dest0], %[src0], %[src1] \n\t"
- "psrlw %[dest0], %[dest0], %[eight] \n\t"
-
- "punpckhhw %[src0], %[b], %[r] \n\t"
- "punpckhhw %[src1], %[g], %[value] \n\t"
- "punpcklhw %[src_lo], %[src0], %[src1] \n\t"
- "punpckhhw %[src_hi], %[src0], %[src1] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
- "punpcklwd %[src0], %[src_lo], %[src_hi] \n\t"
- "punpckhwd %[src1], %[src_lo], %[src_hi] \n\t"
- "paddw %[dest1], %[src0], %[src1] \n\t"
- "psrlw %[dest1], %[dest1], %[eight] \n\t"
-
- "gsldrc1 %[src0], 0x08(%[src_argb1555]) \n\t"
- "gsldlc1 %[src0], 0x0f(%[src_argb1555]) \n\t"
- "psrlh %[src1], %[src0], %[eight] \n\t"
- "and %[b], %[src0], %[c0] \n\t"
- "and %[src0], %[src0], %[c1] \n\t"
- "psrlh %[src0], %[src0], %[five] \n\t"
- "and %[g], %[src1], %[c2] \n\t"
- "psllh %[g], %[g], %[three] \n\t"
- "or %[g], %[src0], %[g] \n\t"
- "and %[r], %[src1], %[c3] \n\t"
- "psrlh %[r], %[r], %[two] \n\t"
- "psllh %[src0], %[b], %[three] \n\t"
- "psrlh %[src1], %[b], %[two] \n\t"
- "or %[b], %[src0], %[src1] \n\t"
- "psllh %[src0], %[g], %[three] \n\t"
- "psrlh %[src1], %[g], %[two] \n\t"
- "or %[g], %[src0], %[src1] \n\t"
- "psllh %[src0], %[r], %[three] \n\t"
- "psrlh %[src1], %[r], %[two] \n\t"
- "or %[r], %[src0], %[src1] \n\t"
- "punpcklhw %[src0], %[b], %[r] \n\t"
- "punpcklhw %[src1], %[g], %[value] \n\t"
- "punpcklhw %[src_lo], %[src0], %[src1] \n\t"
- "punpckhhw %[src_hi], %[src0], %[src1] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
- "punpcklwd %[src0], %[src_lo], %[src_hi] \n\t"
- "punpckhwd %[src1], %[src_lo], %[src_hi] \n\t"
- "paddw %[dest2], %[src0], %[src1] \n\t"
- "psrlw %[dest2], %[dest2], %[eight] \n\t"
-
- "punpckhhw %[src0], %[b], %[r] \n\t"
- "punpckhhw %[src1], %[g], %[value] \n\t"
- "punpcklhw %[src_lo], %[src0], %[src1] \n\t"
- "punpckhhw %[src_hi], %[src0], %[src1] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
- "punpcklwd %[src0], %[src_lo], %[src_hi] \n\t"
- "punpckhwd %[src1], %[src_lo], %[src_hi] \n\t"
- "paddw %[dest3], %[src0], %[src1] \n\t"
- "psrlw %[dest3], %[dest3], %[eight] \n\t"
-
- "packsswh %[src_lo], %[dest0], %[dest1] \n\t"
- "packsswh %[src_hi], %[dest2], %[dest3] \n\t"
- "packushb %[dest0], %[src_lo], %[src_hi] \n\t"
- "gssdlc1 %[dest0], 0x07(%[dst_y]) \n\t"
- "gssdrc1 %[dest0], 0x00(%[dst_y]) \n\t"
-
- "daddiu %[src_argb1555], %[src_argb1555], 0x10 \n\t"
- "daddiu %[dst_y], %[dst_y], 0x08 \n\t"
- "daddiu %[width], %[width], -0x08 \n\t"
- "bgtz %[width], 1b \n\t"
- : [src0] "=&f"(ftmp[0]), [src1] "=&f"(ftmp[1]), [src_lo] "=&f"(ftmp[2]),
- [src_hi] "=&f"(ftmp[3]), [b] "=&f"(ftmp[4]), [g] "=&f"(ftmp[5]),
- [r] "=&f"(ftmp[6]), [dest0] "=&f"(ftmp[7]), [dest1] "=&f"(ftmp[8]),
- [dest2] "=&f"(ftmp[9]), [dest3] "=&f"(ftmp[10])
- : [src_argb1555] "r"(src_argb1555), [dst_y] "r"(dst_y),
- [width] "r"(width), [value] "f"(value), [mask] "f"(mask), [c0] "f"(c0),
- [c1] "f"(c1), [c2] "f"(c2), [c3] "f"(c3), [eight] "f"(0x08),
- [five] "f"(0x05), [three] "f"(0x03), [two] "f"(0x02), [seven] "f"(0x07)
- : "memory");
-}
-
-void ARGB4444ToYRow_MMI(const uint8_t* src_argb4444,
- uint8_t* dst_y,
- int width) {
- uint64_t ftmp[11];
- uint64_t value = 0x1080108010801080;
- uint64_t mask = 0x0001004200810019;
- uint64_t c0 = 0x000f000f000f000f;
- uint64_t c1 = 0x00ff00ff00ff00ff;
- __asm__ volatile(
- "1: \n\t"
- "gsldrc1 %[src0], 0x00(%[src_argb4444]) \n\t"
- "gsldlc1 %[src0], 0x07(%[src_argb4444]) \n\t"
- "psrlh %[src1], %[src0], %[eight] \n\t"
- "and %[b], %[src0], %[c0] \n\t"
- "and %[src0], %[src0], %[c1] \n\t"
- "psrlh %[g], %[src0], %[four] \n\t"
- "and %[r], %[src1], %[c0] \n\t"
- "psllh %[src0], %[b], %[four] \n\t"
- "or %[b], %[src0], %[b] \n\t"
- "psllh %[src0], %[g], %[four] \n\t"
- "or %[g], %[src0], %[g] \n\t"
- "psllh %[src0], %[r], %[four] \n\t"
- "or %[r], %[src0], %[r] \n\t"
- "punpcklhw %[src0], %[b], %[r] \n\t"
- "punpcklhw %[src1], %[g], %[value] \n\t"
- "punpcklhw %[src_lo], %[src0], %[src1] \n\t"
- "punpckhhw %[src_hi], %[src0], %[src1] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
- "punpcklwd %[src0], %[src_lo], %[src_hi] \n\t"
- "punpckhwd %[src1], %[src_lo], %[src_hi] \n\t"
- "paddw %[dest0], %[src0], %[src1] \n\t"
- "psrlw %[dest0], %[dest0], %[eight] \n\t"
-
- "punpckhhw %[src0], %[b], %[r] \n\t"
- "punpckhhw %[src1], %[g], %[value] \n\t"
- "punpcklhw %[src_lo], %[src0], %[src1] \n\t"
- "punpckhhw %[src_hi], %[src0], %[src1] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
- "punpcklwd %[src0], %[src_lo], %[src_hi] \n\t"
- "punpckhwd %[src1], %[src_lo], %[src_hi] \n\t"
- "paddw %[dest1], %[src0], %[src1] \n\t"
- "psrlw %[dest1], %[dest1], %[eight] \n\t"
-
- "gsldrc1 %[src0], 0x08(%[src_argb4444]) \n\t"
- "gsldlc1 %[src0], 0x0f(%[src_argb4444]) \n\t"
- "psrlh %[src1], %[src0], %[eight] \n\t"
- "and %[b], %[src0], %[c0] \n\t"
- "and %[src0], %[src0], %[c1] \n\t"
- "psrlh %[g], %[src0], %[four] \n\t"
- "and %[r], %[src1], %[c0] \n\t"
- "psllh %[src0], %[b], %[four] \n\t"
- "or %[b], %[src0], %[b] \n\t"
- "psllh %[src0], %[g], %[four] \n\t"
- "or %[g], %[src0], %[g] \n\t"
- "psllh %[src0], %[r], %[four] \n\t"
- "or %[r], %[src0], %[r] \n\t"
- "punpcklhw %[src0], %[b], %[r] \n\t"
- "punpcklhw %[src1], %[g], %[value] \n\t"
- "punpcklhw %[src_lo], %[src0], %[src1] \n\t"
- "punpckhhw %[src_hi], %[src0], %[src1] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
- "punpcklwd %[src0], %[src_lo], %[src_hi] \n\t"
- "punpckhwd %[src1], %[src_lo], %[src_hi] \n\t"
- "paddw %[dest2], %[src0], %[src1] \n\t"
- "psrlw %[dest2], %[dest2], %[eight] \n\t"
-
- "punpckhhw %[src0], %[b], %[r] \n\t"
- "punpckhhw %[src1], %[g], %[value] \n\t"
- "punpcklhw %[src_lo], %[src0], %[src1] \n\t"
- "punpckhhw %[src_hi], %[src0], %[src1] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
- "punpcklwd %[src0], %[src_lo], %[src_hi] \n\t"
- "punpckhwd %[src1], %[src_lo], %[src_hi] \n\t"
- "paddw %[dest3], %[src0], %[src1] \n\t"
- "psrlw %[dest3], %[dest3], %[eight] \n\t"
-
- "packsswh %[src_lo], %[dest0], %[dest1] \n\t"
- "packsswh %[src_hi], %[dest2], %[dest3] \n\t"
- "packushb %[dest0], %[src_lo], %[src_hi] \n\t"
- "gssdlc1 %[dest0], 0x07(%[dst_y]) \n\t"
- "gssdrc1 %[dest0], 0x00(%[dst_y]) \n\t"
-
- "daddiu %[src_argb4444], %[src_argb4444], 0x10 \n\t"
- "daddiu %[dst_y], %[dst_y], 0x08 \n\t"
- "daddiu %[width], %[width], -0x08 \n\t"
- "bgtz %[width], 1b \n\t"
- : [src0] "=&f"(ftmp[0]), [src1] "=&f"(ftmp[1]), [src_lo] "=&f"(ftmp[2]),
- [src_hi] "=&f"(ftmp[3]), [b] "=&f"(ftmp[4]), [g] "=&f"(ftmp[5]),
- [r] "=&f"(ftmp[6]), [dest0] "=&f"(ftmp[7]), [dest1] "=&f"(ftmp[8]),
- [dest2] "=&f"(ftmp[9]), [dest3] "=&f"(ftmp[10])
- : [src_argb4444] "r"(src_argb4444), [dst_y] "r"(dst_y),
- [width] "r"(width), [value] "f"(value), [mask] "f"(mask), [c0] "f"(c0),
- [c1] "f"(c1), [eight] "f"(0x08), [four] "f"(0x04)
- : "memory");
-}
-
-void RGB565ToUVRow_MMI(const uint8_t* src_rgb565,
- int src_stride_rgb565,
- uint8_t* dst_u,
- uint8_t* dst_v,
- int width) {
- uint64_t ftmp[13];
- uint64_t value = 0x2020202020202020;
- uint64_t mask_u = 0x0026004a00700002;
- uint64_t mask_v = 0x00020070005e0012;
- uint64_t mask = 0x93;
- uint64_t c0 = 0x001f001f001f001f;
- uint64_t c1 = 0x00ff00ff00ff00ff;
- uint64_t c2 = 0x0007000700070007;
- __asm__ volatile(
- "daddu %[next_rgb565], %[src_rgb565], %[next_rgb565] \n\t"
- "1: \n\t"
- "gsldrc1 %[src0], 0x00(%[src_rgb565]) \n\t"
- "gsldlc1 %[src0], 0x07(%[src_rgb565]) \n\t"
- "gsldrc1 %[src1], 0x00(%[next_rgb565]) \n\t"
- "gsldlc1 %[src1], 0x07(%[next_rgb565]) \n\t"
- "psrlh %[dest0_u], %[src0], %[eight] \n\t"
- "and %[b0], %[src0], %[c0] \n\t"
- "and %[src0], %[src0], %[c1] \n\t"
- "psrlh %[src0], %[src0], %[five] \n\t"
- "and %[g0], %[dest0_u], %[c2] \n\t"
- "psllh %[g0], %[g0], %[three] \n\t"
- "or %[g0], %[src0], %[g0] \n\t"
- "psrlh %[r0], %[dest0_u], %[three] \n\t"
- "psrlh %[src0], %[src1], %[eight] \n\t"
- "and %[dest0_u], %[src1], %[c0] \n\t"
- "and %[src1], %[src1], %[c1] \n\t"
- "psrlh %[src1], %[src1], %[five] \n\t"
- "and %[dest0_v], %[src0], %[c2] \n\t"
- "psllh %[dest0_v], %[dest0_v], %[three] \n\t"
- "or %[dest0_v], %[src1], %[dest0_v] \n\t"
- "psrlh %[src0], %[src0], %[three] \n\t"
- "paddh %[b0], %[b0], %[dest0_u] \n\t"
- "paddh %[g0], %[g0], %[dest0_v] \n\t"
- "paddh %[r0], %[r0], %[src0] \n\t"
- "punpcklhw %[src0], %[b0], %[r0] \n\t"
- "punpckhhw %[src1], %[b0], %[r0] \n\t"
- "punpcklwd %[dest0_u], %[src0], %[src1] \n\t"
- "punpckhwd %[dest0_v], %[src0], %[src1] \n\t"
- "paddh %[src0], %[dest0_u], %[dest0_v] \n\t"
- "psrlh %[b0], %[src0], %[six] \n\t"
- "psllh %[r0], %[src0], %[one] \n\t"
- "or %[b0], %[b0], %[r0] \n\t"
- "punpcklhw %[src0], %[g0], %[value] \n\t"
- "punpckhhw %[src1], %[g0], %[value] \n\t"
- "punpcklwd %[dest0_u], %[src0], %[src1] \n\t"
- "punpckhwd %[dest0_v], %[src0], %[src1] \n\t"
- "paddh %[g0], %[dest0_u], %[dest0_v] \n\t"
- "punpcklhw %[src0], %[b0], %[g0] \n\t"
- "punpckhhw %[src1], %[b0], %[g0] \n\t"
-
- "pmaddhw %[dest0_v], %[src0], %[mask_v] \n\t"
- "pshufh %[dest0_u], %[src0], %[mask] \n\t"
- "pmaddhw %[dest0_u], %[dest0_u], %[mask_u] \n\t"
- "pmaddhw %[g0], %[src1], %[mask_v] \n\t"
- "pshufh %[b0], %[src1], %[mask] \n\t"
- "pmaddhw %[b0], %[b0], %[mask_u] \n\t"
-
- "punpcklwd %[src0], %[dest0_u], %[b0] \n\t"
- "punpckhwd %[src1], %[dest0_u], %[b0] \n\t"
- "psubw %[dest0_u], %[src0], %[src1] \n\t"
- "psraw %[dest0_u], %[dest0_u], %[eight] \n\t"
- "punpcklwd %[src0], %[dest0_v], %[g0] \n\t"
- "punpckhwd %[src1], %[dest0_v], %[g0] \n\t"
- "psubw %[dest0_v], %[src1], %[src0] \n\t"
- "psraw %[dest0_v], %[dest0_v], %[eight] \n\t"
-
- "gsldrc1 %[src0], 0x08(%[src_rgb565]) \n\t"
- "gsldlc1 %[src0], 0x0f(%[src_rgb565]) \n\t"
- "gsldrc1 %[src1], 0x08(%[next_rgb565]) \n\t"
- "gsldlc1 %[src1], 0x0f(%[next_rgb565]) \n\t"
- "psrlh %[dest1_u], %[src0], %[eight] \n\t"
- "and %[b0], %[src0], %[c0] \n\t"
- "and %[src0], %[src0], %[c1] \n\t"
- "psrlh %[src0], %[src0], %[five] \n\t"
- "and %[g0], %[dest1_u], %[c2] \n\t"
- "psllh %[g0], %[g0], %[three] \n\t"
- "or %[g0], %[src0], %[g0] \n\t"
- "psrlh %[r0], %[dest1_u], %[three] \n\t"
- "psrlh %[src0], %[src1], %[eight] \n\t"
- "and %[dest1_u], %[src1], %[c0] \n\t"
- "and %[src1], %[src1], %[c1] \n\t"
- "psrlh %[src1], %[src1], %[five] \n\t"
- "and %[dest1_v], %[src0], %[c2] \n\t"
- "psllh %[dest1_v], %[dest1_v], %[three] \n\t"
- "or %[dest1_v], %[src1], %[dest1_v] \n\t"
- "psrlh %[src0], %[src0], %[three] \n\t"
- "paddh %[b0], %[b0], %[dest1_u] \n\t"
- "paddh %[g0], %[g0], %[dest1_v] \n\t"
- "paddh %[r0], %[r0], %[src0] \n\t"
- "punpcklhw %[src0], %[b0], %[r0] \n\t"
- "punpckhhw %[src1], %[b0], %[r0] \n\t"
- "punpcklwd %[dest1_u], %[src0], %[src1] \n\t"
- "punpckhwd %[dest1_v], %[src0], %[src1] \n\t"
- "paddh %[src0], %[dest1_u], %[dest1_v] \n\t"
- "psrlh %[b0], %[src0], %[six] \n\t"
- "psllh %[r0], %[src0], %[one] \n\t"
- "or %[b0], %[b0], %[r0] \n\t"
- "punpcklhw %[src0], %[g0], %[value] \n\t"
- "punpckhhw %[src1], %[g0], %[value] \n\t"
- "punpcklwd %[dest1_u], %[src0], %[src1] \n\t"
- "punpckhwd %[dest1_v], %[src0], %[src1] \n\t"
- "paddh %[g0], %[dest1_u], %[dest1_v] \n\t"
- "punpcklhw %[src0], %[b0], %[g0] \n\t"
- "punpckhhw %[src1], %[b0], %[g0] \n\t"
-
- "pmaddhw %[dest1_v], %[src0], %[mask_v] \n\t"
- "pshufh %[dest1_u], %[src0], %[mask] \n\t"
- "pmaddhw %[dest1_u], %[dest1_u], %[mask_u] \n\t"
- "pmaddhw %[g0], %[src1], %[mask_v] \n\t"
- "pshufh %[b0], %[src1], %[mask] \n\t"
- "pmaddhw %[b0], %[b0], %[mask_u] \n\t"
-
- "punpcklwd %[src0], %[dest1_u], %[b0] \n\t"
- "punpckhwd %[src1], %[dest1_u], %[b0] \n\t"
- "psubw %[dest1_u], %[src0], %[src1] \n\t"
- "psraw %[dest1_u], %[dest1_u], %[eight] \n\t"
- "punpcklwd %[src0], %[dest1_v], %[g0] \n\t"
- "punpckhwd %[src1], %[dest1_v], %[g0] \n\t"
- "psubw %[dest1_v], %[src1], %[src0] \n\t"
- "psraw %[dest1_v], %[dest1_v], %[eight] \n\t"
-
- "gsldrc1 %[src0], 0x10(%[src_rgb565]) \n\t"
- "gsldlc1 %[src0], 0x17(%[src_rgb565]) \n\t"
- "gsldrc1 %[src1], 0x10(%[next_rgb565]) \n\t"
- "gsldlc1 %[src1], 0x17(%[next_rgb565]) \n\t"
- "psrlh %[dest2_u], %[src0], %[eight] \n\t"
- "and %[b0], %[src0], %[c0] \n\t"
- "and %[src0], %[src0], %[c1] \n\t"
- "psrlh %[src0], %[src0], %[five] \n\t"
- "and %[g0], %[dest2_u], %[c2] \n\t"
- "psllh %[g0], %[g0], %[three] \n\t"
- "or %[g0], %[src0], %[g0] \n\t"
- "psrlh %[r0], %[dest2_u], %[three] \n\t"
- "psrlh %[src0], %[src1], %[eight] \n\t"
- "and %[dest2_u], %[src1], %[c0] \n\t"
- "and %[src1], %[src1], %[c1] \n\t"
- "psrlh %[src1], %[src1], %[five] \n\t"
- "and %[dest2_v], %[src0], %[c2] \n\t"
- "psllh %[dest2_v], %[dest2_v], %[three] \n\t"
- "or %[dest2_v], %[src1], %[dest2_v] \n\t"
- "psrlh %[src0], %[src0], %[three] \n\t"
- "paddh %[b0], %[b0], %[dest2_u] \n\t"
- "paddh %[g0], %[g0], %[dest2_v] \n\t"
- "paddh %[r0], %[r0], %[src0] \n\t"
- "punpcklhw %[src0], %[b0], %[r0] \n\t"
- "punpckhhw %[src1], %[b0], %[r0] \n\t"
- "punpcklwd %[dest2_u], %[src0], %[src1] \n\t"
- "punpckhwd %[dest2_v], %[src0], %[src1] \n\t"
- "paddh %[src0], %[dest2_u], %[dest2_v] \n\t"
- "psrlh %[b0], %[src0], %[six] \n\t"
- "psllh %[r0], %[src0], %[one] \n\t"
- "or %[b0], %[b0], %[r0] \n\t"
- "punpcklhw %[src0], %[g0], %[value] \n\t"
- "punpckhhw %[src1], %[g0], %[value] \n\t"
- "punpcklwd %[dest2_u], %[src0], %[src1] \n\t"
- "punpckhwd %[dest2_v], %[src0], %[src1] \n\t"
- "paddh %[g0], %[dest2_u], %[dest2_v] \n\t"
- "punpcklhw %[src0], %[b0], %[g0] \n\t"
- "punpckhhw %[src1], %[b0], %[g0] \n\t"
-
- "pmaddhw %[dest2_v], %[src0], %[mask_v] \n\t"
- "pshufh %[dest2_u], %[src0], %[mask] \n\t"
- "pmaddhw %[dest2_u], %[dest2_u], %[mask_u] \n\t"
- "pmaddhw %[g0], %[src1], %[mask_v] \n\t"
- "pshufh %[b0], %[src1], %[mask] \n\t"
- "pmaddhw %[b0], %[b0], %[mask_u] \n\t"
-
- "punpcklwd %[src0], %[dest2_u], %[b0] \n\t"
- "punpckhwd %[src1], %[dest2_u], %[b0] \n\t"
- "psubw %[dest2_u], %[src0], %[src1] \n\t"
- "psraw %[dest2_u], %[dest2_u], %[eight] \n\t"
- "punpcklwd %[src0], %[dest2_v], %[g0] \n\t"
- "punpckhwd %[src1], %[dest2_v], %[g0] \n\t"
- "psubw %[dest2_v], %[src1], %[src0] \n\t"
- "psraw %[dest2_v], %[dest2_v], %[eight] \n\t"
-
- "gsldrc1 %[src0], 0x18(%[src_rgb565]) \n\t"
- "gsldlc1 %[src0], 0x1f(%[src_rgb565]) \n\t"
- "gsldrc1 %[src1], 0x18(%[next_rgb565]) \n\t"
- "gsldlc1 %[src1], 0x1f(%[next_rgb565]) \n\t"
- "psrlh %[dest3_u], %[src0], %[eight] \n\t"
- "and %[b0], %[src0], %[c0] \n\t"
- "and %[src0], %[src0], %[c1] \n\t"
- "psrlh %[src0], %[src0], %[five] \n\t"
- "and %[g0], %[dest3_u], %[c2] \n\t"
- "psllh %[g0], %[g0], %[three] \n\t"
- "or %[g0], %[src0], %[g0] \n\t"
- "psrlh %[r0], %[dest3_u], %[three] \n\t"
- "psrlh %[src0], %[src1], %[eight] \n\t"
- "and %[dest3_u], %[src1], %[c0] \n\t"
- "and %[src1], %[src1], %[c1] \n\t"
- "psrlh %[src1], %[src1], %[five] \n\t"
- "and %[dest3_v], %[src0], %[c2] \n\t"
- "psllh %[dest3_v], %[dest3_v], %[three] \n\t"
- "or %[dest3_v], %[src1], %[dest3_v] \n\t"
- "psrlh %[src0], %[src0], %[three] \n\t"
- "paddh %[b0], %[b0], %[dest3_u] \n\t"
- "paddh %[g0], %[g0], %[dest3_v] \n\t"
- "paddh %[r0], %[r0], %[src0] \n\t"
- "punpcklhw %[src0], %[b0], %[r0] \n\t"
- "punpckhhw %[src1], %[b0], %[r0] \n\t"
- "punpcklwd %[dest3_u], %[src0], %[src1] \n\t"
- "punpckhwd %[dest3_v], %[src0], %[src1] \n\t"
- "paddh %[src0], %[dest3_u], %[dest3_v] \n\t"
- "psrlh %[b0], %[src0], %[six] \n\t"
- "psllh %[r0], %[src0], %[one] \n\t"
- "or %[b0], %[b0], %[r0] \n\t"
- "punpcklhw %[src0], %[g0], %[value] \n\t"
- "punpckhhw %[src1], %[g0], %[value] \n\t"
- "punpcklwd %[dest3_u], %[src0], %[src1] \n\t"
- "punpckhwd %[dest3_v], %[src0], %[src1] \n\t"
- "paddh %[g0], %[dest3_u], %[dest3_v] \n\t"
- "punpcklhw %[src0], %[b0], %[g0] \n\t"
- "punpckhhw %[src1], %[b0], %[g0] \n\t"
-
- "pmaddhw %[dest3_v], %[src0], %[mask_v] \n\t"
- "pshufh %[dest3_u], %[src0], %[mask] \n\t"
- "pmaddhw %[dest3_u], %[dest3_u], %[mask_u] \n\t"
- "pmaddhw %[g0], %[src1], %[mask_v] \n\t"
- "pshufh %[b0], %[src1], %[mask] \n\t"
- "pmaddhw %[b0], %[b0], %[mask_u] \n\t"
-
- "punpcklwd %[src0], %[dest3_u], %[b0] \n\t"
- "punpckhwd %[src1], %[dest3_u], %[b0] \n\t"
- "psubw %[dest3_u], %[src0], %[src1] \n\t"
- "psraw %[dest3_u], %[dest3_u], %[eight] \n\t"
- "punpcklwd %[src0], %[dest3_v], %[g0] \n\t"
- "punpckhwd %[src1], %[dest3_v], %[g0] \n\t"
- "psubw %[dest3_v], %[src1], %[src0] \n\t"
- "psraw %[dest3_v], %[dest3_v], %[eight] \n\t"
-
- "packsswh %[src0], %[dest0_u], %[dest1_u] \n\t"
- "packsswh %[src1], %[dest2_u], %[dest3_u] \n\t"
- "packushb %[dest0_u], %[src0], %[src1] \n\t"
- "gssdlc1 %[dest0_u], 0x07(%[dst_u]) \n\t"
- "gssdrc1 %[dest0_u], 0x00(%[dst_u]) \n\t"
- "packsswh %[src0], %[dest0_v], %[dest1_v] \n\t"
- "packsswh %[src1], %[dest2_v], %[dest3_v] \n\t"
- "packushb %[dest0_v], %[src0], %[src1] \n\t"
- "gssdlc1 %[dest0_v], 0x07(%[dst_v]) \n\t"
- "gssdrc1 %[dest0_v], 0x00(%[dst_v]) \n\t"
-
- "daddiu %[src_rgb565], %[src_rgb565], 0x20 \n\t"
- "daddiu %[next_rgb565], %[next_rgb565], 0x20 \n\t"
- "daddiu %[dst_u], %[dst_u], 0x08 \n\t"
- "daddiu %[dst_v], %[dst_v], 0x08 \n\t"
- "daddiu %[width], %[width], -0x10 \n\t"
- "bgtz %[width], 1b \n\t"
- : [src0] "=&f"(ftmp[0]), [src1] "=&f"(ftmp[1]), [b0] "=&f"(ftmp[2]),
- [g0] "=&f"(ftmp[3]), [r0] "=&f"(ftmp[4]), [dest0_u] "=&f"(ftmp[5]),
- [dest1_u] "=&f"(ftmp[6]), [dest2_u] "=&f"(ftmp[7]),
- [dest3_u] "=&f"(ftmp[8]), [dest0_v] "=&f"(ftmp[9]),
- [dest1_v] "=&f"(ftmp[10]), [dest2_v] "=&f"(ftmp[11]),
- [dest3_v] "=&f"(ftmp[12])
- : [src_rgb565] "r"(src_rgb565), [next_rgb565] "r"(src_stride_rgb565),
- [dst_u] "r"(dst_u), [dst_v] "r"(dst_v), [width] "r"(width),
- [value] "f"(value), [c0] "f"(c0), [c1] "f"(c1), [c2] "f"(c2),
- [mask] "f"(mask), [mask_u] "f"(mask_u), [mask_v] "f"(mask_v),
- [eight] "f"(0x08), [six] "f"(0x06), [five] "f"(0x05), [three] "f"(0x03),
- [one] "f"(0x01)
- : "memory");
-}
-
-void ARGB1555ToUVRow_MMI(const uint8_t* src_argb1555,
- int src_stride_argb1555,
- uint8_t* dst_u,
- uint8_t* dst_v,
- int width) {
- uint64_t ftmp[11];
- uint64_t value = 0x2020202020202020;
- uint64_t mask_u = 0x0026004a00700002;
- uint64_t mask_v = 0x00020070005e0012;
- uint64_t mask = 0x93;
- uint64_t c0 = 0x001f001f001f001f;
- uint64_t c1 = 0x00ff00ff00ff00ff;
- uint64_t c2 = 0x0003000300030003;
- uint64_t c3 = 0x007c007c007c007c;
- __asm__ volatile(
- "daddu %[next_argb1555], %[src_argb1555], %[next_argb1555] \n\t"
- "1: \n\t"
- "gsldrc1 %[src0], 0x00(%[src_argb1555]) \n\t"
- "gsldlc1 %[src0], 0x07(%[src_argb1555]) \n\t"
- "gsldrc1 %[src1], 0x00(%[next_argb1555]) \n\t"
- "gsldlc1 %[src1], 0x07(%[next_argb1555]) \n\t"
- "psrlh %[dest0_u], %[src0], %[eight] \n\t"
- "and %[b0], %[src0], %[c0] \n\t"
- "and %[src0], %[src0], %[c1] \n\t"
- "psrlh %[src0], %[src0], %[five] \n\t"
- "and %[g0], %[dest0_u], %[c2] \n\t"
- "psllh %[g0], %[g0], %[three] \n\t"
- "or %[g0], %[src0], %[g0] \n\t"
- "and %[r0], %[dest0_u], %[c3] \n\t"
- "psrlh %[r0], %[r0], %[two] \n\t"
- "psrlh %[src0], %[src1], %[eight] \n\t"
- "and %[dest0_u], %[src1], %[c0] \n\t"
- "and %[src1], %[src1], %[c1] \n\t"
- "psrlh %[src1], %[src1], %[five] \n\t"
- "and %[dest0_v], %[src0], %[c2] \n\t"
- "psllh %[dest0_v], %[dest0_v], %[three] \n\t"
- "or %[dest0_v], %[src1], %[dest0_v] \n\t"
- "and %[src0], %[src0], %[c3] \n\t"
- "psrlh %[src0], %[src0], %[two] \n\t"
- "paddh %[b0], %[b0], %[dest0_u] \n\t"
- "paddh %[g0], %[g0], %[dest0_v] \n\t"
- "paddh %[r0], %[r0], %[src0] \n\t"
- "punpcklhw %[src0], %[b0], %[r0] \n\t"
- "punpckhhw %[src1], %[b0], %[r0] \n\t"
- "punpcklwd %[dest0_u], %[src0], %[src1] \n\t"
- "punpckhwd %[dest0_v], %[src0], %[src1] \n\t"
- "paddh %[src0], %[dest0_u], %[dest0_v] \n\t"
- "psrlh %[b0], %[src0], %[six] \n\t"
- "psllh %[r0], %[src0], %[one] \n\t"
- "or %[b0], %[b0], %[r0] \n\t"
- "psrlh %[r0], %[g0], %[six] \n\t"
- "psllh %[g0], %[g0], %[one] \n\t"
- "or %[g0], %[g0], %[r0] \n\t"
- "punpcklhw %[src0], %[g0], %[value] \n\t"
- "punpckhhw %[src1], %[g0], %[value] \n\t"
- "punpcklwd %[dest0_u], %[src0], %[src1] \n\t"
- "punpckhwd %[dest0_v], %[src0], %[src1] \n\t"
- "paddh %[g0], %[dest0_u], %[dest0_v] \n\t"
- "punpcklhw %[src0], %[b0], %[g0] \n\t"
- "punpckhhw %[src1], %[b0], %[g0] \n\t"
-
- "pmaddhw %[dest0_v], %[src0], %[mask_v] \n\t"
- "pshufh %[dest0_u], %[src0], %[mask] \n\t"
- "pmaddhw %[dest0_u], %[dest0_u], %[mask_u] \n\t"
- "pmaddhw %[g0], %[src1], %[mask_v] \n\t"
- "pshufh %[b0], %[src1], %[mask] \n\t"
- "pmaddhw %[b0], %[b0], %[mask_u] \n\t"
-
- "punpcklwd %[src0], %[dest0_u], %[b0] \n\t"
- "punpckhwd %[src1], %[dest0_u], %[b0] \n\t"
- "psubw %[dest0_u], %[src0], %[src1] \n\t"
- "psraw %[dest0_u], %[dest0_u], %[eight] \n\t"
- "punpcklwd %[src0], %[dest0_v], %[g0] \n\t"
- "punpckhwd %[src1], %[dest0_v], %[g0] \n\t"
- "psubw %[dest0_v], %[src1], %[src0] \n\t"
- "psraw %[dest0_v], %[dest0_v], %[eight] \n\t"
-
- "gsldrc1 %[src0], 0x08(%[src_argb1555]) \n\t"
- "gsldlc1 %[src0], 0x0f(%[src_argb1555]) \n\t"
- "gsldrc1 %[src1], 0x08(%[next_argb1555]) \n\t"
- "gsldlc1 %[src1], 0x0f(%[next_argb1555]) \n\t"
- "psrlh %[dest1_u], %[src0], %[eight] \n\t"
- "and %[b0], %[src0], %[c0] \n\t"
- "and %[src0], %[src0], %[c1] \n\t"
- "psrlh %[src0], %[src0], %[five] \n\t"
- "and %[g0], %[dest1_u], %[c2] \n\t"
- "psllh %[g0], %[g0], %[three] \n\t"
- "or %[g0], %[src0], %[g0] \n\t"
- "and %[r0], %[dest1_u], %[c3] \n\t"
- "psrlh %[r0], %[r0], %[two] \n\t"
- "psrlh %[src0], %[src1], %[eight] \n\t"
- "and %[dest1_u], %[src1], %[c0] \n\t"
- "and %[src1], %[src1], %[c1] \n\t"
- "psrlh %[src1], %[src1], %[five] \n\t"
- "and %[dest1_v], %[src0], %[c2] \n\t"
- "psllh %[dest1_v], %[dest1_v], %[three] \n\t"
- "or %[dest1_v], %[src1], %[dest1_v] \n\t"
- "and %[src0], %[src0], %[c3] \n\t"
- "psrlh %[src0], %[src0], %[two] \n\t"
- "paddh %[b0], %[b0], %[dest1_u] \n\t"
- "paddh %[g0], %[g0], %[dest1_v] \n\t"
- "paddh %[r0], %[r0], %[src0] \n\t"
- "punpcklhw %[src0], %[b0], %[r0] \n\t"
- "punpckhhw %[src1], %[b0], %[r0] \n\t"
- "punpcklwd %[dest1_u], %[src0], %[src1] \n\t"
- "punpckhwd %[dest1_v], %[src0], %[src1] \n\t"
- "paddh %[src0], %[dest1_u], %[dest1_v] \n\t"
- "psrlh %[b0], %[src0], %[six] \n\t"
- "psllh %[r0], %[src0], %[one] \n\t"
- "or %[b0], %[b0], %[r0] \n\t"
- "psrlh %[r0], %[g0], %[six] \n\t"
- "psllh %[g0], %[g0], %[one] \n\t"
- "or %[g0], %[g0], %[r0] \n\t"
- "punpcklhw %[src0], %[g0], %[value] \n\t"
- "punpckhhw %[src1], %[g0], %[value] \n\t"
- "punpcklwd %[dest1_u], %[src0], %[src1] \n\t"
- "punpckhwd %[dest1_v], %[src0], %[src1] \n\t"
- "paddh %[g0], %[dest1_u], %[dest1_v] \n\t"
- "punpcklhw %[src0], %[b0], %[g0] \n\t"
- "punpckhhw %[src1], %[b0], %[g0] \n\t"
-
- "pmaddhw %[dest1_v], %[src0], %[mask_v] \n\t"
- "pshufh %[dest1_u], %[src0], %[mask] \n\t"
- "pmaddhw %[dest1_u], %[dest1_u], %[mask_u] \n\t"
- "pmaddhw %[g0], %[src1], %[mask_v] \n\t"
- "pshufh %[b0], %[src1], %[mask] \n\t"
- "pmaddhw %[b0], %[b0], %[mask_u] \n\t"
-
- "punpcklwd %[src0], %[dest1_u], %[b0] \n\t"
- "punpckhwd %[src1], %[dest1_u], %[b0] \n\t"
- "psubw %[dest1_u], %[src0], %[src1] \n\t"
- "psraw %[dest1_u], %[dest1_u], %[eight] \n\t"
- "punpcklwd %[src0], %[dest1_v], %[g0] \n\t"
- "punpckhwd %[src1], %[dest1_v], %[g0] \n\t"
- "psubw %[dest1_v], %[src1], %[src0] \n\t"
- "psraw %[dest1_v], %[dest1_v], %[eight] \n\t"
-
- "packsswh %[dest0_u], %[dest0_u], %[dest1_u] \n\t"
- "packsswh %[dest1_u], %[dest0_v], %[dest1_v] \n\t"
-
- "gsldrc1 %[src0], 0x10(%[src_argb1555]) \n\t"
- "gsldlc1 %[src0], 0x17(%[src_argb1555]) \n\t"
- "gsldrc1 %[src1], 0x10(%[next_argb1555]) \n\t"
- "gsldlc1 %[src1], 0x17(%[next_argb1555]) \n\t"
- "psrlh %[dest2_u], %[src0], %[eight] \n\t"
- "and %[b0], %[src0], %[c0] \n\t"
- "and %[src0], %[src0], %[c1] \n\t"
- "psrlh %[src0], %[src0], %[five] \n\t"
- "and %[g0], %[dest2_u], %[c2] \n\t"
- "psllh %[g0], %[g0], %[three] \n\t"
- "or %[g0], %[src0], %[g0] \n\t"
- "and %[r0], %[dest2_u], %[c3] \n\t"
- "psrlh %[r0], %[r0], %[two] \n\t"
- "psrlh %[src0], %[src1], %[eight] \n\t"
- "and %[dest2_u], %[src1], %[c0] \n\t"
- "and %[src1], %[src1], %[c1] \n\t"
- "psrlh %[src1], %[src1], %[five] \n\t"
- "and %[dest0_v], %[src0], %[c2] \n\t"
- "psllh %[dest0_v], %[dest0_v], %[three] \n\t"
- "or %[dest0_v], %[src1], %[dest0_v] \n\t"
- "and %[src0], %[src0], %[c3] \n\t"
- "psrlh %[src0], %[src0], %[two] \n\t"
- "paddh %[b0], %[b0], %[dest2_u] \n\t"
- "paddh %[g0], %[g0], %[dest0_v] \n\t"
- "paddh %[r0], %[r0], %[src0] \n\t"
- "punpcklhw %[src0], %[b0], %[r0] \n\t"
- "punpckhhw %[src1], %[b0], %[r0] \n\t"
- "punpcklwd %[dest2_u], %[src0], %[src1] \n\t"
- "punpckhwd %[dest0_v], %[src0], %[src1] \n\t"
- "paddh %[src0], %[dest2_u], %[dest0_v] \n\t"
- "psrlh %[b0], %[src0], %[six] \n\t"
- "psllh %[r0], %[src0], %[one] \n\t"
- "or %[b0], %[b0], %[r0] \n\t"
- "psrlh %[r0], %[g0], %[six] \n\t"
- "psllh %[g0], %[g0], %[one] \n\t"
- "or %[g0], %[g0], %[r0] \n\t"
- "punpcklhw %[src0], %[g0], %[value] \n\t"
- "punpckhhw %[src1], %[g0], %[value] \n\t"
- "punpcklwd %[dest2_u], %[src0], %[src1] \n\t"
- "punpckhwd %[dest0_v], %[src0], %[src1] \n\t"
- "paddh %[g0], %[dest2_u], %[dest0_v] \n\t"
- "punpcklhw %[src0], %[b0], %[g0] \n\t"
- "punpckhhw %[src1], %[b0], %[g0] \n\t"
-
- "pmaddhw %[dest0_v], %[src0], %[mask_v] \n\t"
- "pshufh %[dest2_u], %[src0], %[mask] \n\t"
- "pmaddhw %[dest2_u], %[dest2_u], %[mask_u] \n\t"
- "pmaddhw %[g0], %[src1], %[mask_v] \n\t"
- "pshufh %[b0], %[src1], %[mask] \n\t"
- "pmaddhw %[b0], %[b0], %[mask_u] \n\t"
-
- "punpcklwd %[src0], %[dest2_u], %[b0] \n\t"
- "punpckhwd %[src1], %[dest2_u], %[b0] \n\t"
- "psubw %[dest2_u], %[src0], %[src1] \n\t"
- "psraw %[dest2_u], %[dest2_u], %[eight] \n\t"
- "punpcklwd %[src0], %[dest0_v], %[g0] \n\t"
- "punpckhwd %[src1], %[dest0_v], %[g0] \n\t"
- "psubw %[dest0_v], %[src1], %[src0] \n\t"
- "psraw %[dest0_v], %[dest0_v], %[eight] \n\t"
-
- "gsldrc1 %[src0], 0x18(%[src_argb1555]) \n\t"
- "gsldlc1 %[src0], 0x1f(%[src_argb1555]) \n\t"
- "gsldrc1 %[src1], 0x18(%[next_argb1555]) \n\t"
- "gsldlc1 %[src1], 0x1f(%[next_argb1555]) \n\t"
- "psrlh %[dest3_u], %[src0], %[eight] \n\t"
- "and %[b0], %[src0], %[c0] \n\t"
- "and %[src0], %[src0], %[c1] \n\t"
- "psrlh %[src0], %[src0], %[five] \n\t"
- "and %[g0], %[dest3_u], %[c2] \n\t"
- "psllh %[g0], %[g0], %[three] \n\t"
- "or %[g0], %[src0], %[g0] \n\t"
- "and %[r0], %[dest3_u], %[c3] \n\t"
- "psrlh %[r0], %[r0], %[two] \n\t"
- "psrlh %[src0], %[src1], %[eight] \n\t"
- "and %[dest3_u], %[src1], %[c0] \n\t"
- "and %[src1], %[src1], %[c1] \n\t"
- "psrlh %[src1], %[src1], %[five] \n\t"
- "and %[dest1_v], %[src0], %[c2] \n\t"
- "psllh %[dest1_v], %[dest1_v], %[three] \n\t"
- "or %[dest1_v], %[src1], %[dest1_v] \n\t"
- "and %[src0], %[src0], %[c3] \n\t"
- "psrlh %[src0], %[src0], %[two] \n\t"
- "paddh %[b0], %[b0], %[dest3_u] \n\t"
- "paddh %[g0], %[g0], %[dest1_v] \n\t"
- "paddh %[r0], %[r0], %[src0] \n\t"
- "punpcklhw %[src0], %[b0], %[r0] \n\t"
- "punpckhhw %[src1], %[b0], %[r0] \n\t"
- "punpcklwd %[dest3_u], %[src0], %[src1] \n\t"
- "punpckhwd %[dest1_v], %[src0], %[src1] \n\t"
- "paddh %[src0], %[dest3_u], %[dest1_v] \n\t"
- "psrlh %[b0], %[src0], %[six] \n\t"
- "psllh %[r0], %[src0], %[one] \n\t"
- "or %[b0], %[b0], %[r0] \n\t"
- "psrlh %[r0], %[g0], %[six] \n\t"
- "psllh %[g0], %[g0], %[one] \n\t"
- "or %[g0], %[g0], %[r0] \n\t"
- "punpcklhw %[src0], %[g0], %[value] \n\t"
- "punpckhhw %[src1], %[g0], %[value] \n\t"
- "punpcklwd %[dest3_u], %[src0], %[src1] \n\t"
- "punpckhwd %[dest1_v], %[src0], %[src1] \n\t"
- "paddh %[g0], %[dest3_u], %[dest1_v] \n\t"
- "punpcklhw %[src0], %[b0], %[g0] \n\t"
- "punpckhhw %[src1], %[b0], %[g0] \n\t"
-
- "pmaddhw %[dest1_v], %[src0], %[mask_v] \n\t"
- "pshufh %[dest3_u], %[src0], %[mask] \n\t"
- "pmaddhw %[dest3_u], %[dest3_u], %[mask_u] \n\t"
- "pmaddhw %[g0], %[src1], %[mask_v] \n\t"
- "pshufh %[b0], %[src1], %[mask] \n\t"
- "pmaddhw %[b0], %[b0], %[mask_u] \n\t"
-
- "punpcklwd %[src0], %[dest3_u], %[b0] \n\t"
- "punpckhwd %[src1], %[dest3_u], %[b0] \n\t"
- "psubw %[dest3_u], %[src0], %[src1] \n\t"
- "psraw %[dest3_u], %[dest3_u], %[eight] \n\t"
- "punpcklwd %[src0], %[dest1_v], %[g0] \n\t"
- "punpckhwd %[src1], %[dest1_v], %[g0] \n\t"
- "psubw %[dest1_v], %[src1], %[src0] \n\t"
- "psraw %[dest1_v], %[dest1_v], %[eight] \n\t"
-
- "packsswh %[src1], %[dest2_u], %[dest3_u] \n\t"
- "packushb %[dest0_u], %[dest0_u], %[src1] \n\t"
- "gssdlc1 %[dest0_u], 0x07(%[dst_u]) \n\t"
- "gssdrc1 %[dest0_u], 0x00(%[dst_u]) \n\t"
- "packsswh %[src1], %[dest0_v], %[dest1_v] \n\t"
- "packushb %[dest0_v], %[dest1_u], %[src1] \n\t"
- "gssdlc1 %[dest0_v], 0x07(%[dst_v]) \n\t"
- "gssdrc1 %[dest0_v], 0x00(%[dst_v]) \n\t"
-
- "daddiu %[src_argb1555], %[src_argb1555], 0x20 \n\t"
- "daddiu %[next_argb1555], %[next_argb1555], 0x20 \n\t"
- "daddiu %[dst_u], %[dst_u], 0x08 \n\t"
- "daddiu %[dst_v], %[dst_v], 0x08 \n\t"
- "daddiu %[width], %[width], -0x10 \n\t"
- "bgtz %[width], 1b \n\t"
- : [src0] "=&f"(ftmp[0]), [src1] "=&f"(ftmp[1]), [b0] "=&f"(ftmp[2]),
- [g0] "=&f"(ftmp[3]), [r0] "=&f"(ftmp[4]), [dest0_u] "=&f"(ftmp[5]),
- [dest1_u] "=&f"(ftmp[6]), [dest2_u] "=&f"(ftmp[7]),
- [dest3_u] "=&f"(ftmp[8]), [dest0_v] "=&f"(ftmp[9]),
- [dest1_v] "=&f"(ftmp[10])
- : [src_argb1555] "r"(src_argb1555),
- [next_argb1555] "r"(src_stride_argb1555), [dst_u] "r"(dst_u),
- [dst_v] "r"(dst_v), [width] "r"(width), [value] "f"(value),
- [c0] "f"(c0), [c1] "f"(c1), [c2] "f"(c2), [c3] "f"(c3),
- [mask] "f"(mask), [mask_u] "f"(mask_u), [mask_v] "f"(mask_v),
- [eight] "f"(0x08), [six] "f"(0x06), [five] "f"(0x05), [three] "f"(0x03),
- [two] "f"(0x02), [one] "f"(0x01)
- : "memory");
-}
-
-void ARGB4444ToUVRow_MMI(const uint8_t* src_argb4444,
- int src_stride_argb4444,
- uint8_t* dst_u,
- uint8_t* dst_v,
- int width) {
- uint64_t ftmp[13];
- uint64_t value = 0x2020202020202020;
- uint64_t mask_u = 0x0026004a00700002;
- uint64_t mask_v = 0x00020070005e0012;
- uint64_t mask = 0x93;
- uint64_t c0 = 0x000f000f000f000f;
- uint64_t c1 = 0x00ff00ff00ff00ff;
- __asm__ volatile(
- "daddu %[next_argb4444], %[src_argb4444], %[next_argb4444] \n\t"
- "1: \n\t"
- "gsldrc1 %[src0], 0x00(%[src_argb4444]) \n\t"
- "gsldlc1 %[src0], 0x07(%[src_argb4444]) \n\t"
- "gsldrc1 %[src1], 0x00(%[next_argb4444]) \n\t"
- "gsldlc1 %[src1], 0x07(%[next_argb4444]) \n\t"
- "psrlh %[dest0_u], %[src0], %[eight] \n\t"
- "and %[b0], %[src0], %[c0] \n\t"
- "and %[src0], %[src0], %[c1] \n\t"
- "psrlh %[g0], %[src0], %[four] \n\t"
- "and %[r0], %[dest0_u], %[c0] \n\t"
- "psrlh %[src0], %[src1], %[eight] \n\t"
- "and %[dest0_u], %[src1], %[c0] \n\t"
- "and %[src1], %[src1], %[c1] \n\t"
- "psrlh %[dest0_v], %[src1], %[four] \n\t"
- "and %[src0], %[src0], %[c0] \n\t"
- "paddh %[b0], %[b0], %[dest0_u] \n\t"
- "paddh %[g0], %[g0], %[dest0_v] \n\t"
- "paddh %[r0], %[r0], %[src0] \n\t"
- "punpcklhw %[src0], %[b0], %[r0] \n\t"
- "punpckhhw %[src1], %[b0], %[r0] \n\t"
- "punpcklwd %[dest0_u], %[src0], %[src1] \n\t"
- "punpckhwd %[dest0_v], %[src0], %[src1] \n\t"
- "paddh %[src0], %[dest0_u], %[dest0_v] \n\t"
- "psrlh %[b0], %[src0], %[four] \n\t"
- "psllh %[r0], %[src0], %[two] \n\t"
- "or %[b0], %[b0], %[r0] \n\t"
- "psrlh %[r0], %[g0], %[four] \n\t"
- "psllh %[g0], %[g0], %[two] \n\t"
- "or %[g0], %[g0], %[r0] \n\t"
- "punpcklhw %[src0], %[g0], %[value] \n\t"
- "punpckhhw %[src1], %[g0], %[value] \n\t"
- "punpcklwd %[dest0_u], %[src0], %[src1] \n\t"
- "punpckhwd %[dest0_v], %[src0], %[src1] \n\t"
- "paddh %[g0], %[dest0_u], %[dest0_v] \n\t"
- "punpcklhw %[src0], %[b0], %[g0] \n\t"
- "punpckhhw %[src1], %[b0], %[g0] \n\t"
-
- "pmaddhw %[dest0_v], %[src0], %[mask_v] \n\t"
- "pshufh %[dest0_u], %[src0], %[mask] \n\t"
- "pmaddhw %[dest0_u], %[dest0_u], %[mask_u] \n\t"
- "pmaddhw %[g0], %[src1], %[mask_v] \n\t"
- "pshufh %[b0], %[src1], %[mask] \n\t"
- "pmaddhw %[b0], %[b0], %[mask_u] \n\t"
-
- "punpcklwd %[src0], %[dest0_u], %[b0] \n\t"
- "punpckhwd %[src1], %[dest0_u], %[b0] \n\t"
- "psubw %[dest0_u], %[src0], %[src1] \n\t"
- "psraw %[dest0_u], %[dest0_u], %[eight] \n\t"
- "punpcklwd %[src0], %[dest0_v], %[g0] \n\t"
- "punpckhwd %[src1], %[dest0_v], %[g0] \n\t"
- "psubw %[dest0_v], %[src1], %[src0] \n\t"
- "psraw %[dest0_v], %[dest0_v], %[eight] \n\t"
-
- "gsldrc1 %[src0], 0x08(%[src_argb4444]) \n\t"
- "gsldlc1 %[src0], 0x0f(%[src_argb4444]) \n\t"
- "gsldrc1 %[src1], 0x08(%[next_argb4444]) \n\t"
- "gsldlc1 %[src1], 0x0f(%[next_argb4444]) \n\t"
- "psrlh %[dest1_u], %[src0], %[eight] \n\t"
- "and %[b0], %[src0], %[c0] \n\t"
- "and %[src0], %[src0], %[c1] \n\t"
- "psrlh %[g0], %[src0], %[four] \n\t"
- "and %[r0], %[dest1_u], %[c0] \n\t"
- "psrlh %[src0], %[src1], %[eight] \n\t"
- "and %[dest1_u], %[src1], %[c0] \n\t"
- "and %[src1], %[src1], %[c1] \n\t"
- "psrlh %[dest1_v], %[src1], %[four] \n\t"
- "and %[src0], %[src0], %[c0] \n\t"
- "paddh %[b0], %[b0], %[dest1_u] \n\t"
- "paddh %[g0], %[g0], %[dest1_v] \n\t"
- "paddh %[r0], %[r0], %[src0] \n\t"
- "punpcklhw %[src0], %[b0], %[r0] \n\t"
- "punpckhhw %[src1], %[b0], %[r0] \n\t"
- "punpcklwd %[dest1_u], %[src0], %[src1] \n\t"
- "punpckhwd %[dest1_v], %[src0], %[src1] \n\t"
- "paddh %[src0], %[dest1_u], %[dest1_v] \n\t"
- "psrlh %[b0], %[src0], %[four] \n\t"
- "psllh %[r0], %[src0], %[two] \n\t"
- "or %[b0], %[b0], %[r0] \n\t"
- "psrlh %[r0], %[g0], %[four] \n\t"
- "psllh %[g0], %[g0], %[two] \n\t"
- "or %[g0], %[g0], %[r0] \n\t"
- "punpcklhw %[src0], %[g0], %[value] \n\t"
- "punpckhhw %[src1], %[g0], %[value] \n\t"
- "punpcklwd %[dest1_u], %[src0], %[src1] \n\t"
- "punpckhwd %[dest1_v], %[src0], %[src1] \n\t"
- "paddh %[g0], %[dest1_u], %[dest1_v] \n\t"
- "punpcklhw %[src0], %[b0], %[g0] \n\t"
- "punpckhhw %[src1], %[b0], %[g0] \n\t"
-
- "pmaddhw %[dest1_v], %[src0], %[mask_v] \n\t"
- "pshufh %[dest1_u], %[src0], %[mask] \n\t"
- "pmaddhw %[dest1_u], %[dest1_u], %[mask_u] \n\t"
- "pmaddhw %[g0], %[src1], %[mask_v] \n\t"
- "pshufh %[b0], %[src1], %[mask] \n\t"
- "pmaddhw %[b0], %[b0], %[mask_u] \n\t"
-
- "punpcklwd %[src0], %[dest1_u], %[b0] \n\t"
- "punpckhwd %[src1], %[dest1_u], %[b0] \n\t"
- "psubw %[dest1_u], %[src0], %[src1] \n\t"
- "psraw %[dest1_u], %[dest1_u], %[eight] \n\t"
- "punpcklwd %[src0], %[dest1_v], %[g0] \n\t"
- "punpckhwd %[src1], %[dest1_v], %[g0] \n\t"
- "psubw %[dest1_v], %[src1], %[src0] \n\t"
- "psraw %[dest1_v], %[dest1_v], %[eight] \n\t"
-
- "gsldrc1 %[src0], 0x10(%[src_argb4444]) \n\t"
- "gsldlc1 %[src0], 0x17(%[src_argb4444]) \n\t"
- "gsldrc1 %[src1], 0x10(%[next_argb4444]) \n\t"
- "gsldlc1 %[src1], 0x17(%[next_argb4444]) \n\t"
- "psrlh %[dest2_u], %[src0], %[eight] \n\t"
- "and %[b0], %[src0], %[c0] \n\t"
- "and %[src0], %[src0], %[c1] \n\t"
- "psrlh %[g0], %[src0], %[four] \n\t"
- "and %[r0], %[dest2_u], %[c0] \n\t"
- "psrlh %[src0], %[src1], %[eight] \n\t"
- "and %[dest2_u], %[src1], %[c0] \n\t"
- "and %[src1], %[src1], %[c1] \n\t"
- "psrlh %[dest2_v], %[src1], %[four] \n\t"
- "and %[src0], %[src0], %[c0] \n\t"
- "paddh %[b0], %[b0], %[dest2_u] \n\t"
- "paddh %[g0], %[g0], %[dest2_v] \n\t"
- "paddh %[r0], %[r0], %[src0] \n\t"
- "punpcklhw %[src0], %[b0], %[r0] \n\t"
- "punpckhhw %[src1], %[b0], %[r0] \n\t"
- "punpcklwd %[dest2_u], %[src0], %[src1] \n\t"
- "punpckhwd %[dest2_v], %[src0], %[src1] \n\t"
- "paddh %[src0], %[dest2_u], %[dest2_v] \n\t"
- "psrlh %[b0], %[src0], %[four] \n\t"
- "psllh %[r0], %[src0], %[two] \n\t"
- "or %[b0], %[b0], %[r0] \n\t"
- "psrlh %[r0], %[g0], %[four] \n\t"
- "psllh %[g0], %[g0], %[two] \n\t"
- "or %[g0], %[g0], %[r0] \n\t"
- "punpcklhw %[src0], %[g0], %[value] \n\t"
- "punpckhhw %[src1], %[g0], %[value] \n\t"
- "punpcklwd %[dest2_u], %[src0], %[src1] \n\t"
- "punpckhwd %[dest2_v], %[src0], %[src1] \n\t"
- "paddh %[g0], %[dest2_u], %[dest2_v] \n\t"
- "punpcklhw %[src0], %[b0], %[g0] \n\t"
- "punpckhhw %[src1], %[b0], %[g0] \n\t"
-
- "pmaddhw %[dest2_v], %[src0], %[mask_v] \n\t"
- "pshufh %[dest2_u], %[src0], %[mask] \n\t"
- "pmaddhw %[dest2_u], %[dest2_u], %[mask_u] \n\t"
- "pmaddhw %[g0], %[src1], %[mask_v] \n\t"
- "pshufh %[b0], %[src1], %[mask] \n\t"
- "pmaddhw %[b0], %[b0], %[mask_u] \n\t"
-
- "punpcklwd %[src0], %[dest2_u], %[b0] \n\t"
- "punpckhwd %[src1], %[dest2_u], %[b0] \n\t"
- "psubw %[dest2_u], %[src0], %[src1] \n\t"
- "psraw %[dest2_u], %[dest2_u], %[eight] \n\t"
- "punpcklwd %[src0], %[dest2_v], %[g0] \n\t"
- "punpckhwd %[src1], %[dest2_v], %[g0] \n\t"
- "psubw %[dest2_v], %[src1], %[src0] \n\t"
- "psraw %[dest2_v], %[dest2_v], %[eight] \n\t"
-
- "gsldrc1 %[src0], 0x18(%[src_argb4444]) \n\t"
- "gsldlc1 %[src0], 0x1f(%[src_argb4444]) \n\t"
- "gsldrc1 %[src1], 0x18(%[next_argb4444]) \n\t"
- "gsldlc1 %[src1], 0x1f(%[next_argb4444]) \n\t"
- "psrlh %[dest3_u], %[src0], %[eight] \n\t"
- "and %[b0], %[src0], %[c0] \n\t"
- "and %[src0], %[src0], %[c1] \n\t"
- "psrlh %[g0], %[src0], %[four] \n\t"
- "and %[r0], %[dest3_u], %[c0] \n\t"
- "psrlh %[src0], %[src1], %[eight] \n\t"
- "and %[dest3_u], %[src1], %[c0] \n\t"
- "and %[src1], %[src1], %[c1] \n\t"
- "psrlh %[dest3_v], %[src1], %[four] \n\t"
- "and %[src0], %[src0], %[c0] \n\t"
- "paddh %[b0], %[b0], %[dest3_u] \n\t"
- "paddh %[g0], %[g0], %[dest3_v] \n\t"
- "paddh %[r0], %[r0], %[src0] \n\t"
- "punpcklhw %[src0], %[b0], %[r0] \n\t"
- "punpckhhw %[src1], %[b0], %[r0] \n\t"
- "punpcklwd %[dest3_u], %[src0], %[src1] \n\t"
- "punpckhwd %[dest3_v], %[src0], %[src1] \n\t"
- "paddh %[src0], %[dest3_u], %[dest3_v] \n\t"
- "psrlh %[b0], %[src0], %[four] \n\t"
- "psllh %[r0], %[src0], %[two] \n\t"
- "or %[b0], %[b0], %[r0] \n\t"
- "psrlh %[r0], %[g0], %[four] \n\t"
- "psllh %[g0], %[g0], %[two] \n\t"
- "or %[g0], %[g0], %[r0] \n\t"
- "punpcklhw %[src0], %[g0], %[value] \n\t"
- "punpckhhw %[src1], %[g0], %[value] \n\t"
- "punpcklwd %[dest3_u], %[src0], %[src1] \n\t"
- "punpckhwd %[dest3_v], %[src0], %[src1] \n\t"
- "paddh %[g0], %[dest3_u], %[dest3_v] \n\t"
- "punpcklhw %[src0], %[b0], %[g0] \n\t"
- "punpckhhw %[src1], %[b0], %[g0] \n\t"
-
- "pmaddhw %[dest3_v], %[src0], %[mask_v] \n\t"
- "pshufh %[dest3_u], %[src0], %[mask] \n\t"
- "pmaddhw %[dest3_u], %[dest3_u], %[mask_u] \n\t"
- "pmaddhw %[g0], %[src1], %[mask_v] \n\t"
- "pshufh %[b0], %[src1], %[mask] \n\t"
- "pmaddhw %[b0], %[b0], %[mask_u] \n\t"
-
- "punpcklwd %[src0], %[dest3_u], %[b0] \n\t"
- "punpckhwd %[src1], %[dest3_u], %[b0] \n\t"
- "psubw %[dest3_u], %[src0], %[src1] \n\t"
- "psraw %[dest3_u], %[dest3_u], %[eight] \n\t"
- "punpcklwd %[src0], %[dest3_v], %[g0] \n\t"
- "punpckhwd %[src1], %[dest3_v], %[g0] \n\t"
- "psubw %[dest3_v], %[src1], %[src0] \n\t"
- "psraw %[dest3_v], %[dest3_v], %[eight] \n\t"
-
- "packsswh %[src0], %[dest0_u], %[dest1_u] \n\t"
- "packsswh %[src1], %[dest2_u], %[dest3_u] \n\t"
- "packushb %[dest0_u], %[src0], %[src1] \n\t"
- "gssdlc1 %[dest0_u], 0x07(%[dst_u]) \n\t"
- "gssdrc1 %[dest0_u], 0x00(%[dst_u]) \n\t"
- "packsswh %[src0], %[dest0_v], %[dest1_v] \n\t"
- "packsswh %[src1], %[dest2_v], %[dest3_v] \n\t"
- "packushb %[dest0_v], %[src0], %[src1] \n\t"
- "gssdlc1 %[dest0_v], 0x07(%[dst_v]) \n\t"
- "gssdrc1 %[dest0_v], 0x00(%[dst_v]) \n\t"
-
- "daddiu %[src_argb4444], %[src_argb4444], 0x20 \n\t"
- "daddiu %[next_argb4444], %[next_argb4444], 0x20 \n\t"
- "daddiu %[dst_u], %[dst_u], 0x08 \n\t"
- "daddiu %[dst_v], %[dst_v], 0x08 \n\t"
- "daddiu %[width], %[width], -0x10 \n\t"
- "bgtz %[width], 1b \n\t"
- : [src0] "=&f"(ftmp[0]), [src1] "=&f"(ftmp[1]), [b0] "=&f"(ftmp[2]),
- [g0] "=&f"(ftmp[3]), [r0] "=&f"(ftmp[4]), [dest0_u] "=&f"(ftmp[5]),
- [dest1_u] "=&f"(ftmp[6]), [dest2_u] "=&f"(ftmp[7]),
- [dest3_u] "=&f"(ftmp[8]), [dest0_v] "=&f"(ftmp[9]),
- [dest1_v] "=&f"(ftmp[10]), [dest2_v] "=&f"(ftmp[11]),
- [dest3_v] "=&f"(ftmp[12])
- : [src_argb4444] "r"(src_argb4444),
- [next_argb4444] "r"(src_stride_argb4444), [dst_u] "r"(dst_u),
- [dst_v] "r"(dst_v), [width] "r"(width), [value] "f"(value),
- [c0] "f"(c0), [c1] "f"(c1), [mask] "f"(mask), [mask_u] "f"(mask_u),
- [mask_v] "f"(mask_v), [eight] "f"(0x08), [four] "f"(0x04),
- [two] "f"(0x02)
- : "memory");
-}
-
-void ARGBToUV444Row_MMI(const uint8_t* src_argb,
- uint8_t* dst_u,
- uint8_t* dst_v,
- int width) {
- uint64_t ftmp[12];
- const uint64_t value = 0x4040;
- const uint64_t mask_u = 0x0026004a00700002;
- const uint64_t mask_v = 0x00020070005e0012;
-
- __asm__ volatile(
- "1: \n\t"
- "gsldrc1 %[src0], 0x00(%[src_argb]) \n\t"
- "gsldlc1 %[src0], 0x07(%[src_argb]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "dsll %[dest0_u], %[src_lo], %[sixteen] \n\t"
- "pinsrh_0 %[dest0_u], %[dest0_u], %[value] \n\t"
- "pinsrh_3 %[dest0_v], %[src_lo], %[value] \n\t"
- "pmaddhw %[dest0_u], %[dest0_u], %[mask_u] \n\t"
- "pmaddhw %[dest0_v], %[dest0_v], %[mask_v] \n\t"
-
- "dsll %[src_lo], %[src_hi], %[sixteen] \n\t"
- "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
- "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
-
- "punpcklwd %[src0], %[dest0_u], %[src_lo] \n\t"
- "punpckhwd %[src1], %[dest0_u], %[src_lo] \n\t"
- "psubw %[dest0_u], %[src0], %[src1] \n\t"
- "psraw %[dest0_u], %[dest0_u], %[eight] \n\t"
- "punpcklwd %[src0], %[dest0_v], %[src_hi] \n\t"
- "punpckhwd %[src1], %[dest0_v], %[src_hi] \n\t"
- "psubw %[dest0_v], %[src1], %[src0] \n\t"
- "psraw %[dest0_v], %[dest0_v], %[eight] \n\t"
-
- "gsldrc1 %[src0], 0x08(%[src_argb]) \n\t"
- "gsldlc1 %[src0], 0x0f(%[src_argb]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "dsll %[dest1_u], %[src_lo], %[sixteen] \n\t"
- "pinsrh_0 %[dest1_u], %[dest1_u], %[value] \n\t"
- "pinsrh_3 %[dest1_v], %[src_lo], %[value] \n\t"
- "pmaddhw %[dest1_u], %[dest1_u], %[mask_u] \n\t"
- "pmaddhw %[dest1_v], %[dest1_v], %[mask_v] \n\t"
- "dsll %[src_lo], %[src_hi], %[sixteen] \n\t"
- "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
- "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
-
- "punpcklwd %[src0], %[dest1_u], %[src_lo] \n\t"
- "punpckhwd %[src1], %[dest1_u], %[src_lo] \n\t"
- "psubw %[dest1_u], %[src0], %[src1] \n\t"
- "psraw %[dest1_u], %[dest1_u], %[eight] \n\t"
- "punpcklwd %[src0], %[dest1_v], %[src_hi] \n\t"
- "punpckhwd %[src1], %[dest1_v], %[src_hi] \n\t"
- "psubw %[dest1_v], %[src1], %[src0] \n\t"
- "psraw %[dest1_v], %[dest1_v], %[eight] \n\t"
-
- "gsldrc1 %[src0], 0x10(%[src_argb]) \n\t"
- "gsldlc1 %[src0], 0x17(%[src_argb]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "dsll %[dest2_u], %[src_lo], %[sixteen] \n\t"
- "pinsrh_0 %[dest2_u], %[dest2_u], %[value] \n\t"
- "pinsrh_3 %[dest2_v], %[src_lo], %[value] \n\t"
- "pmaddhw %[dest2_u], %[dest2_u], %[mask_u] \n\t"
- "pmaddhw %[dest2_v], %[dest2_v], %[mask_v] \n\t"
- "dsll %[src_lo], %[src_hi], %[sixteen] \n\t"
- "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
- "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
-
- "punpcklwd %[src0], %[dest2_u], %[src_lo] \n\t"
- "punpckhwd %[src1], %[dest2_u], %[src_lo] \n\t"
- "psubw %[dest2_u], %[src0], %[src1] \n\t"
- "psraw %[dest2_u], %[dest2_u], %[eight] \n\t"
- "punpcklwd %[src0], %[dest2_v], %[src_hi] \n\t"
- "punpckhwd %[src1], %[dest2_v], %[src_hi] \n\t"
- "psubw %[dest2_v], %[src1], %[src0] \n\t"
- "psraw %[dest2_v], %[dest2_v], %[eight] \n\t"
-
- "gsldrc1 %[src0], 0x18(%[src_argb]) \n\t"
- "gsldlc1 %[src0], 0x1f(%[src_argb]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "dsll %[dest3_u], %[src_lo], %[sixteen] \n\t"
- "pinsrh_0 %[dest3_u], %[dest3_u], %[value] \n\t"
- "pinsrh_3 %[dest3_v], %[src_lo], %[value] \n\t"
- "pmaddhw %[dest3_u], %[dest3_u], %[mask_u] \n\t"
- "pmaddhw %[dest3_v], %[dest3_v], %[mask_v] \n\t"
- "dsll %[src_lo], %[src_hi], %[sixteen] \n\t"
- "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
- "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
-
- "punpcklwd %[src0], %[dest3_u], %[src_lo] \n\t"
- "punpckhwd %[src1], %[dest3_u], %[src_lo] \n\t"
- "psubw %[dest3_u], %[src0], %[src1] \n\t"
- "psraw %[dest3_u], %[dest3_u], %[eight] \n\t"
- "punpcklwd %[src0], %[dest3_v], %[src_hi] \n\t"
- "punpckhwd %[src1], %[dest3_v], %[src_hi] \n\t"
- "psubw %[dest3_v], %[src1], %[src0] \n\t"
- "psraw %[dest3_v], %[dest3_v], %[eight] \n\t"
-
- "packsswh %[src0], %[dest0_u], %[dest1_u] \n\t"
- "packsswh %[src1], %[dest2_u], %[dest3_u] \n\t"
- "packushb %[dest0_u], %[src0], %[src1] \n\t"
- "gssdlc1 %[dest0_u], 0x07(%[dst_u]) \n\t"
- "gssdrc1 %[dest0_u], 0x00(%[dst_u]) \n\t"
-
- "packsswh %[src0], %[dest0_v], %[dest1_v] \n\t"
- "packsswh %[src1], %[dest2_v], %[dest3_v] \n\t"
- "packushb %[dest0_v], %[src0], %[src1] \n\t"
- "gssdlc1 %[dest0_v], 0x07(%[dst_v]) \n\t"
- "gssdrc1 %[dest0_v], 0x00(%[dst_v]) \n\t"
-
- "daddiu %[src_argb], %[src_argb], 0x20 \n\t"
- "daddiu %[dst_u], %[dst_u], 0x08 \n\t"
- "daddiu %[dst_v], %[dst_v], 0x08 \n\t"
- "daddi %[width], %[width], -0x08 \n\t"
- "bgtz %[width], 1b \n\t"
- : [src0] "=&f"(ftmp[0]), [src1] "=&f"(ftmp[1]), [src_lo] "=&f"(ftmp[2]),
- [src_hi] "=&f"(ftmp[3]), [dest0_u] "=&f"(ftmp[4]),
- [dest0_v] "=&f"(ftmp[5]), [dest1_u] "=&f"(ftmp[6]),
- [dest1_v] "=&f"(ftmp[7]), [dest2_u] "=&f"(ftmp[8]),
- [dest2_v] "=&f"(ftmp[9]), [dest3_u] "=&f"(ftmp[10]),
- [dest3_v] "=&f"(ftmp[11])
- : [src_argb] "r"(src_argb), [dst_u] "r"(dst_u), [dst_v] "r"(dst_v),
- [width] "r"(width), [mask_u] "f"(mask_u), [mask_v] "f"(mask_v),
- [value] "f"(value), [zero] "f"(0x00), [sixteen] "f"(0x10),
- [eight] "f"(0x08)
- : "memory");
-}
-
-void ARGBGrayRow_MMI(const uint8_t* src_argb, uint8_t* dst_argb, int width) {
- uint64_t src, src_lo, src_hi, src37, dest, dest_lo, dest_hi;
- uint64_t tmp0, tmp1;
- const uint64_t mask0 = 0x0;
- const uint64_t mask1 = 0x01;
- const uint64_t mask2 = 0x0080004D0096001DULL;
- const uint64_t mask3 = 0xFF000000FF000000ULL;
- const uint64_t mask4 = ~mask3;
- const uint64_t shift = 0x08;
-
- __asm__ volatile(
- "1: \n\t"
- "gsldlc1 %[src], 0x07(%[src_ptr]) \n\t"
- "gsldrc1 %[src], 0x00(%[src_ptr]) \n\t"
-
- "and %[src37], %[src], %[mask3] \n\t"
-
- "punpcklbh %[src_lo], %[src], %[mask0] \n\t"
- "pinsrh_3 %[src_lo], %[src_lo], %[mask1] \n\t"
- "pmaddhw %[dest_lo], %[src_lo], %[mask2] \n\t"
- "punpcklwd %[tmp0], %[dest_lo], %[dest_lo] \n\t"
- "punpckhwd %[tmp1], %[dest_lo], %[dest_lo] \n\t"
- "paddw %[dest_lo], %[tmp0], %[tmp1] \n\t"
- "psrlw %[dest_lo], %[dest_lo], %[shift] \n\t"
- "packsswh %[dest_lo], %[dest_lo], %[dest_lo] \n\t"
-
- "punpckhbh %[src_hi], %[src], %[mask0] \n\t"
- "pinsrh_3 %[src_hi], %[src_hi], %[mask1] \n\t"
- "pmaddhw %[dest_hi], %[src_hi], %[mask2] \n\t"
- "punpcklwd %[tmp0], %[dest_hi], %[dest_hi] \n\t"
- "punpckhwd %[tmp1], %[dest_hi], %[dest_hi] \n\t"
- "paddw %[dest_hi], %[tmp0], %[tmp1] \n\t"
- "psrlw %[dest_hi], %[dest_hi], %[shift] \n\t"
- "packsswh %[dest_hi], %[dest_hi], %[dest_hi] \n\t"
-
- "packushb %[dest], %[dest_lo], %[dest_hi] \n\t"
- "and %[dest], %[dest], %[mask4] \n\t"
- "or %[dest], %[dest], %[src37] \n\t"
-
- "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
- "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
-
- "daddiu %[src_ptr], %[src_ptr], 0x08 \n\t"
- "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t"
- "daddi %[width], %[width], -0x02 \n\t"
- "bnez %[width], 1b \n\t"
- : [dest_hi] "=&f"(dest_hi), [dest_lo] "=&f"(dest_lo),
- [src_hi] "=&f"(src_hi), [src_lo] "=&f"(src_lo), [tmp0] "=&f"(tmp0),
- [tmp1] "=&f"(tmp1), [src] "=&f"(src), [dest] "=&f"(dest),
- [src37] "=&f"(src37)
- : [src_ptr] "r"(src_argb), [dst_ptr] "r"(dst_argb), [width] "r"(width),
- [shift] "f"(shift), [mask0] "f"(mask0), [mask1] "f"(mask1),
- [mask2] "f"(mask2), [mask3] "f"(mask3), [mask4] "f"(mask4)
- : "memory");
-}
-
-// Convert a row of image to Sepia tone.
-void ARGBSepiaRow_MMI(uint8_t* dst_argb, int width) {
- uint64_t dest, dest_lo, dest_hi, dest37, dest0, dest1, dest2;
- uint64_t tmp0, tmp1;
- const uint64_t mask0 = 0x0;
- const uint64_t mask1 = 0x002300440011ULL;
- const uint64_t mask2 = 0x002D00580016ULL;
- const uint64_t mask3 = 0x003200620018ULL;
- const uint64_t mask4 = 0xFF000000FF000000ULL;
- const uint64_t shift = 0x07;
-
- __asm__ volatile(
- "1: \n\t"
- "gsldlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
- "gsldrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
-
- "and %[dest37], %[dest], %[mask4] \n\t"
-
- "punpcklbh %[dest_lo], %[dest], %[mask0] \n\t"
- "pmaddhw %[dest0], %[dest_lo], %[mask1] \n\t"
- "pmaddhw %[dest1], %[dest_lo], %[mask2] \n\t"
- "pmaddhw %[dest2], %[dest_lo], %[mask3] \n\t"
- "punpcklwd %[tmp0], %[dest0], %[dest1] \n\t"
- "punpckhwd %[tmp1], %[dest0], %[dest1] \n\t"
- "paddw %[dest0], %[tmp0], %[tmp1] \n\t"
- "psrlw %[dest0], %[dest0], %[shift] \n\t"
- "punpcklwd %[tmp0], %[dest2], %[mask0] \n\t"
- "punpckhwd %[tmp1], %[dest2], %[mask0] \n\t"
- "paddw %[dest1], %[tmp0], %[tmp1] \n\t"
- "psrlw %[dest1], %[dest1], %[shift] \n\t"
- "packsswh %[dest_lo], %[dest0], %[dest1] \n\t"
-
- "punpckhbh %[dest_hi], %[dest], %[mask0] \n\t"
- "pmaddhw %[dest0], %[dest_hi], %[mask1] \n\t"
- "pmaddhw %[dest1], %[dest_hi], %[mask2] \n\t"
- "pmaddhw %[dest2], %[dest_hi], %[mask3] \n\t"
- "punpcklwd %[tmp0], %[dest0], %[dest1] \n\t"
- "punpckhwd %[tmp1], %[dest0], %[dest1] \n\t"
- "paddw %[dest0], %[tmp0], %[tmp1] \n\t"
- "psrlw %[dest0], %[dest0], %[shift] \n\t"
- "punpcklwd %[tmp0], %[dest2], %[mask0] \n\t"
- "punpckhwd %[tmp1], %[dest2], %[mask0] \n\t"
- "paddw %[dest1], %[tmp0], %[tmp1] \n\t"
- "psrlw %[dest1], %[dest1], %[shift] \n\t"
- "packsswh %[dest_hi], %[dest0], %[dest1] \n\t"
-
- "packushb %[dest], %[dest_lo], %[dest_hi] \n\t"
- "or %[dest], %[dest], %[dest37] \n\t"
-
- "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
- "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
-
- "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t"
- "daddi %[width], %[width], -0x02 \n\t"
- "bnez %[width], 1b \n\t"
- : [dest_hi] "=&f"(dest_hi), [dest_lo] "=&f"(dest_lo),
- [dest0] "=&f"(dest0), [dest1] "=&f"(dest1), [dest2] "=&f"(dest2),
- [dest37] "=&f"(dest37), [tmp0] "=&f"(tmp0), [tmp1] "=&f"(tmp1),
- [dest] "=&f"(dest)
- : [dst_ptr] "r"(dst_argb), [width] "r"(width), [mask0] "f"(mask0),
- [mask1] "f"(mask1), [mask2] "f"(mask2), [mask3] "f"(mask3),
- [mask4] "f"(mask4), [shift] "f"(shift)
- : "memory");
-}
-
-// Apply color matrix to a row of image. Matrix is signed.
-// TODO(fbarchard): Consider adding rounding (+32).
-void ARGBColorMatrixRow_MMI(const uint8_t* src_argb,
- uint8_t* dst_argb,
- const int8_t* matrix_argb,
- int width) {
- uint64_t src, src_hi, src_lo, dest, dest_lo, dest_hi, dest0, dest1, dest2,
- dest3;
- uint64_t matrix, matrix_hi, matrix_lo;
- uint64_t tmp0, tmp1;
- const uint64_t shift0 = 0x06;
- const uint64_t shift1 = 0x08;
- const uint64_t mask0 = 0x0;
- const uint64_t mask1 = 0x08;
-
- __asm__ volatile(
- "1: \n\t"
- "gsldlc1 %[src], 0x07(%[src_ptr]) \n\t"
- "gsldrc1 %[src], 0x00(%[src_ptr]) \n\t"
-
- "punpcklbh %[src_lo], %[src], %[mask0] \n\t"
-
- "gsldlc1 %[matrix], 0x07(%[matrix_ptr]) \n\t"
- "gsldrc1 %[matrix], 0x00(%[matrix_ptr]) \n\t"
- "punpcklbh %[matrix_lo], %[matrix], %[mask0] \n\t"
- "psllh %[matrix_lo], %[matrix_lo], %[shift1] \n\t"
- "psrah %[matrix_lo], %[matrix_lo], %[shift1] \n\t"
- "punpckhbh %[matrix_hi], %[matrix], %[mask0] \n\t"
- "psllh %[matrix_hi], %[matrix_hi], %[shift1] \n\t"
- "psrah %[matrix_hi], %[matrix_hi], %[shift1] \n\t"
- "pmaddhw %[dest_lo], %[src_lo], %[matrix_lo] \n\t"
- "pmaddhw %[dest_hi], %[src_lo], %[matrix_hi] \n\t"
- "punpcklwd %[tmp0], %[dest_lo], %[dest_hi] \n\t"
- "punpckhwd %[tmp1], %[dest_lo], %[dest_hi] \n\t"
- "paddw %[dest0], %[tmp0], %[tmp1] \n\t"
- "psraw %[dest0], %[dest0], %[shift0] \n\t"
-
- "gsldlc1 %[matrix], 0x0f(%[matrix_ptr]) \n\t"
- "gsldrc1 %[matrix], 0x08(%[matrix_ptr]) \n\t"
- "punpcklbh %[matrix_lo], %[matrix], %[mask0] \n\t"
- "psllh %[matrix_lo], %[matrix_lo], %[shift1] \n\t"
- "psrah %[matrix_lo], %[matrix_lo], %[shift1] \n\t"
- "punpckhbh %[matrix_hi], %[matrix], %[mask0] \n\t"
- "psllh %[matrix_hi], %[matrix_hi], %[shift1] \n\t"
- "psrah %[matrix_hi], %[matrix_hi], %[shift1] \n\t"
- "pmaddhw %[dest_lo], %[src_lo], %[matrix_lo] \n\t"
- "pmaddhw %[dest_hi], %[src_lo], %[matrix_hi] \n\t"
- "punpcklwd %[tmp0], %[dest_lo], %[dest_hi] \n\t"
- "punpckhwd %[tmp1], %[dest_lo], %[dest_hi] \n\t"
- "paddw %[dest1], %[tmp0], %[tmp1] \n\t"
- "psraw %[dest1], %[dest1], %[shift0] \n\t"
-
- "punpckhbh %[src_hi], %[src], %[mask0] \n\t"
-
- "gsldlc1 %[matrix], 0x07(%[matrix_ptr]) \n\t"
- "gsldrc1 %[matrix], 0x00(%[matrix_ptr]) \n\t"
- "punpcklbh %[matrix_lo], %[matrix], %[mask0] \n\t"
- "psllh %[matrix_lo], %[matrix_lo], %[shift1] \n\t"
- "psrah %[matrix_lo], %[matrix_lo], %[shift1] \n\t"
- "punpckhbh %[matrix_hi], %[matrix], %[mask0] \n\t"
- "psllh %[matrix_hi], %[matrix_hi], %[shift1] \n\t"
- "psrah %[matrix_hi], %[matrix_hi], %[shift1] \n\t"
- "pmaddhw %[dest_lo], %[src_hi], %[matrix_lo] \n\t"
- "pmaddhw %[dest_hi], %[src_hi], %[matrix_hi] \n\t"
- "punpcklwd %[tmp0], %[dest_lo], %[dest_hi] \n\t"
- "punpckhwd %[tmp1], %[dest_lo], %[dest_hi] \n\t"
- "paddw %[dest2], %[tmp0], %[tmp1] \n\t"
- "psraw %[dest2], %[dest2], %[shift0] \n\t"
-
- "gsldlc1 %[matrix], 0x0f(%[matrix_ptr]) \n\t"
- "gsldrc1 %[matrix], 0x08(%[matrix_ptr]) \n\t"
- "punpcklbh %[matrix_lo], %[matrix], %[mask0] \n\t"
- "psllh %[matrix_lo], %[matrix_lo], %[shift1] \n\t"
- "psrah %[matrix_lo], %[matrix_lo], %[shift1] \n\t"
- "punpckhbh %[matrix_hi], %[matrix], %[mask0] \n\t"
- "psllh %[matrix_hi], %[matrix_hi], %[shift1] \n\t"
- "psrah %[matrix_hi], %[matrix_hi], %[shift1] \n\t"
- "pmaddhw %[dest_lo], %[src_hi], %[matrix_lo] \n\t"
- "pmaddhw %[dest_hi], %[src_hi], %[matrix_hi] \n\t"
- "punpcklwd %[tmp0], %[dest_lo], %[dest_hi] \n\t"
- "punpckhwd %[tmp1], %[dest_lo], %[dest_hi] \n\t"
- "paddw %[dest3], %[tmp0], %[tmp1] \n\t"
- "psraw %[dest3], %[dest3], %[shift0] \n\t"
-
- "packsswh %[tmp0], %[dest0], %[dest1] \n\t"
- "packsswh %[tmp1], %[dest2], %[dest3] \n\t"
- "packushb %[dest], %[tmp0], %[tmp1] \n\t"
-
- "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
- "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
-
- "daddiu %[src_ptr], %[src_ptr], 0x08 \n\t"
- "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t"
- "daddi %[width], %[width], -0x02 \n\t"
- "bnez %[width], 1b \n\t"
- : [src_hi] "=&f"(src_hi), [src_lo] "=&f"(src_lo),
- [dest_hi] "=&f"(dest_hi), [dest_lo] "=&f"(dest_lo),
- [dest0] "=&f"(dest0), [dest1] "=&f"(dest1), [dest2] "=&f"(dest2),
- [dest3] "=&f"(dest3), [src] "=&f"(src), [dest] "=&f"(dest),
- [tmp0] "=&f"(tmp0), [tmp1] "=&f"(tmp1), [matrix_hi] "=&f"(matrix_hi),
- [matrix_lo] "=&f"(matrix_lo), [matrix] "=&f"(matrix)
- : [src_ptr] "r"(src_argb), [matrix_ptr] "r"(matrix_argb),
- [dst_ptr] "r"(dst_argb), [width] "r"(width), [shift0] "f"(shift0),
- [shift1] "f"(shift1), [mask0] "f"(mask0), [mask1] "f"(mask1)
- : "memory");
-}
-
-void ARGBShadeRow_MMI(const uint8_t* src_argb,
- uint8_t* dst_argb,
- int width,
- uint32_t value) {
- uint64_t src, src_hi, src_lo, dest, dest_lo, dest_hi;
- const uint64_t shift = 0x08;
-
- __asm__ volatile(
- "1: \n\t"
- "gsldlc1 %[src], 0x07(%[src_ptr]) \n\t"
- "gsldrc1 %[src], 0x00(%[src_ptr]) \n\t"
- "punpcklbh %[src_lo], %[src], %[src] \n\t"
- "punpckhbh %[src_hi], %[src], %[src] \n\t"
-
- "punpcklbh %[value], %[value], %[value] \n\t"
-
- "pmulhuh %[dest_lo], %[src_lo], %[value] \n\t"
- "psrlh %[dest_lo], %[dest_lo], %[shift] \n\t"
- "pmulhuh %[dest_hi], %[src_hi], %[value] \n\t"
- "psrlh %[dest_hi], %[dest_hi], %[shift] \n\t"
- "packushb %[dest], %[dest_lo], %[dest_hi] \n\t"
-
- "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
- "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
-
- "daddiu %[src_ptr], %[src_ptr], 0x08 \n\t"
- "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t"
- "daddi %[width], %[width], -0x02 \n\t"
- "bnez %[width], 1b \n\t"
- : [src_hi] "=&f"(src_hi), [src_lo] "=&f"(src_lo),
- [dest_hi] "=&f"(dest_hi), [dest_lo] "=&f"(dest_lo), [src] "=&f"(src),
- [dest] "=&f"(dest)
- : [src_ptr] "r"(src_argb), [dst_ptr] "r"(dst_argb), [width] "r"(width),
- [value] "f"(value), [shift] "f"(shift)
- : "memory");
-}
-
-void ARGBMultiplyRow_MMI(const uint8_t* src_argb,
- const uint8_t* src_argb1,
- uint8_t* dst_argb,
- int width) {
- uint64_t src0, src0_hi, src0_lo, src1, src1_hi, src1_lo;
- uint64_t dest, dest_lo, dest_hi;
- const uint64_t mask = 0x0;
-
- __asm__ volatile(
- "1: \n\t"
- "gsldlc1 %[src0], 0x07(%[src0_ptr]) \n\t"
- "gsldrc1 %[src0], 0x00(%[src0_ptr]) \n\t"
- "punpcklbh %[src0_lo], %[src0], %[src0] \n\t"
- "punpckhbh %[src0_hi], %[src0], %[src0] \n\t"
-
- "gsldlc1 %[src1], 0x07(%[src1_ptr]) \n\t"
- "gsldrc1 %[src1], 0x00(%[src1_ptr]) \n\t"
- "punpcklbh %[src1_lo], %[src1], %[mask] \n\t"
- "punpckhbh %[src1_hi], %[src1], %[mask] \n\t"
-
- "pmulhuh %[dest_lo], %[src0_lo], %[src1_lo] \n\t"
- "pmulhuh %[dest_hi], %[src0_hi], %[src1_hi] \n\t"
- "packushb %[dest], %[dest_lo], %[dest_hi] \n\t"
-
- "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
- "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
-
- "daddiu %[src0_ptr], %[src0_ptr], 0x08 \n\t"
- "daddiu %[src1_ptr], %[src1_ptr], 0x08 \n\t"
- "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t"
- "daddi %[width], %[width], -0x02 \n\t"
- "bnez %[width], 1b \n\t"
- : [src0_hi] "=&f"(src0_hi), [src0_lo] "=&f"(src0_lo),
- [src1_hi] "=&f"(src1_hi), [src1_lo] "=&f"(src1_lo),
- [dest_hi] "=&f"(dest_hi), [dest_lo] "=&f"(dest_lo), [src0] "=&f"(src0),
- [src1] "=&f"(src1), [dest] "=&f"(dest)
- : [src0_ptr] "r"(src_argb), [src1_ptr] "r"(src_argb1),
- [dst_ptr] "r"(dst_argb), [width] "r"(width), [mask] "f"(mask)
- : "memory");
-}
-
-void ARGBAddRow_MMI(const uint8_t* src_argb,
- const uint8_t* src_argb1,
- uint8_t* dst_argb,
- int width) {
- uint64_t src0, src1, dest;
-
- __asm__ volatile(
- "1: \n\t"
- "gsldlc1 %[src0], 0x07(%[src0_ptr]) \n\t"
- "gsldrc1 %[src0], 0x00(%[src0_ptr]) \n\t"
- "gsldlc1 %[src1], 0x07(%[src1_ptr]) \n\t"
- "gsldrc1 %[src1], 0x00(%[src1_ptr]) \n\t"
- "paddusb %[dest], %[src0], %[src1] \n\t"
- "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
- "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
-
- "daddiu %[src0_ptr], %[src0_ptr], 0x08 \n\t"
- "daddiu %[src1_ptr], %[src1_ptr], 0x08 \n\t"
- "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t"
- "daddi %[width], %[width], -0x02 \n\t"
- "bnez %[width], 1b \n\t"
- : [src0] "=&f"(src0), [src1] "=&f"(src1), [dest] "=&f"(dest)
- : [src0_ptr] "r"(src_argb), [src1_ptr] "r"(src_argb1),
- [dst_ptr] "r"(dst_argb), [width] "r"(width)
- : "memory");
-}
-
-void ARGBSubtractRow_MMI(const uint8_t* src_argb,
- const uint8_t* src_argb1,
- uint8_t* dst_argb,
- int width) {
- uint64_t src0, src1, dest;
-
- __asm__ volatile(
- "1: \n\t"
- "gsldlc1 %[src0], 0x07(%[src0_ptr]) \n\t"
- "gsldrc1 %[src0], 0x00(%[src0_ptr]) \n\t"
- "gsldlc1 %[src1], 0x07(%[src1_ptr]) \n\t"
- "gsldrc1 %[src1], 0x00(%[src1_ptr]) \n\t"
- "psubusb %[dest], %[src0], %[src1] \n\t"
- "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
- "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
-
- "daddiu %[src0_ptr], %[src0_ptr], 0x08 \n\t"
- "daddiu %[src1_ptr], %[src1_ptr], 0x08 \n\t"
- "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t"
- "daddi %[width], %[width], -0x02 \n\t"
- "bnez %[width], 1b \n\t"
- : [src0] "=&f"(src0), [src1] "=&f"(src1), [dest] "=&f"(dest)
- : [src0_ptr] "r"(src_argb), [src1_ptr] "r"(src_argb1),
- [dst_ptr] "r"(dst_argb), [width] "r"(width)
- : "memory");
-}
-
-// Sobel functions which mimics SSSE3.
-void SobelXRow_MMI(const uint8_t* src_y0,
- const uint8_t* src_y1,
- const uint8_t* src_y2,
- uint8_t* dst_sobelx,
- int width) {
- uint64_t y00 = 0, y10 = 0, y20 = 0;
- uint64_t y02 = 0, y12 = 0, y22 = 0;
- uint64_t zero = 0x0;
- uint64_t sobel = 0x0;
- __asm__ volatile(
- "1: \n\t"
- "gsldlc1 %[y00], 0x07(%[src_y0]) \n\t" // a=src_y0[i]
- "gsldrc1 %[y00], 0x00(%[src_y0]) \n\t"
- "gsldlc1 %[y02], 0x09(%[src_y0]) \n\t" // a_sub=src_y0[i+2]
- "gsldrc1 %[y02], 0x02(%[src_y0]) \n\t"
-
- "gsldlc1 %[y10], 0x07(%[src_y1]) \n\t" // b=src_y1[i]
- "gsldrc1 %[y10], 0x00(%[src_y1]) \n\t"
- "gsldlc1 %[y12], 0x09(%[src_y1]) \n\t" // b_sub=src_y1[i+2]
- "gsldrc1 %[y12], 0x02(%[src_y1]) \n\t"
-
- "gsldlc1 %[y20], 0x07(%[src_y2]) \n\t" // c=src_y2[i]
- "gsldrc1 %[y20], 0x00(%[src_y2]) \n\t"
- "gsldlc1 %[y22], 0x09(%[src_y2]) \n\t" // c_sub=src_y2[i+2]
- "gsldrc1 %[y22], 0x02(%[src_y2]) \n\t"
-
- "punpcklbh %[y00], %[y00], %[zero] \n\t"
- "punpcklbh %[y10], %[y10], %[zero] \n\t"
- "punpcklbh %[y20], %[y20], %[zero] \n\t"
-
- "punpcklbh %[y02], %[y02], %[zero] \n\t"
- "punpcklbh %[y12], %[y12], %[zero] \n\t"
- "punpcklbh %[y22], %[y22], %[zero] \n\t"
-
- "paddh %[y00], %[y00], %[y10] \n\t" // a+b
- "paddh %[y20], %[y20], %[y10] \n\t" // c+b
- "paddh %[y00], %[y00], %[y20] \n\t" // a+2b+c
-
- "paddh %[y02], %[y02], %[y12] \n\t" // a_sub+b_sub
- "paddh %[y22], %[y22], %[y12] \n\t" // c_sub+b_sub
- "paddh %[y02], %[y02], %[y22] \n\t" // a_sub+2b_sub+c_sub
-
- "pmaxsh %[y10], %[y00], %[y02] \n\t"
- "pminsh %[y20], %[y00], %[y02] \n\t"
- "psubh %[sobel], %[y10], %[y20] \n\t" // Abs
-
- "gsldlc1 %[y00], 0x0B(%[src_y0]) \n\t"
- "gsldrc1 %[y00], 0x04(%[src_y0]) \n\t"
- "gsldlc1 %[y02], 0x0D(%[src_y0]) \n\t"
- "gsldrc1 %[y02], 0x06(%[src_y0]) \n\t"
-
- "gsldlc1 %[y10], 0x0B(%[src_y1]) \n\t"
- "gsldrc1 %[y10], 0x04(%[src_y1]) \n\t"
- "gsldlc1 %[y12], 0x0D(%[src_y1]) \n\t"
- "gsldrc1 %[y12], 0x06(%[src_y1]) \n\t"
-
- "gsldlc1 %[y20], 0x0B(%[src_y2]) \n\t"
- "gsldrc1 %[y20], 0x04(%[src_y2]) \n\t"
- "gsldlc1 %[y22], 0x0D(%[src_y2]) \n\t"
- "gsldrc1 %[y22], 0x06(%[src_y2]) \n\t"
-
- "punpcklbh %[y00], %[y00], %[zero] \n\t"
- "punpcklbh %[y10], %[y10], %[zero] \n\t"
- "punpcklbh %[y20], %[y20], %[zero] \n\t"
-
- "punpcklbh %[y02], %[y02], %[zero] \n\t"
- "punpcklbh %[y12], %[y12], %[zero] \n\t"
- "punpcklbh %[y22], %[y22], %[zero] \n\t"
-
- "paddh %[y00], %[y00], %[y10] \n\t"
- "paddh %[y20], %[y20], %[y10] \n\t"
- "paddh %[y00], %[y00], %[y20] \n\t"
-
- "paddh %[y02], %[y02], %[y12] \n\t"
- "paddh %[y22], %[y22], %[y12] \n\t"
- "paddh %[y02], %[y02], %[y22] \n\t"
-
- "pmaxsh %[y10], %[y00], %[y02] \n\t"
- "pminsh %[y20], %[y00], %[y02] \n\t"
- "psubh %[y00], %[y10], %[y20] \n\t"
-
- "packushb %[sobel], %[sobel], %[y00] \n\t" // clamp255
- "gssdrc1 %[sobel], 0(%[dst_sobelx]) \n\t"
- "gssdlc1 %[sobel], 7(%[dst_sobelx]) \n\t"
-
- "daddiu %[src_y0], %[src_y0], 8 \n\t"
- "daddiu %[src_y1], %[src_y1], 8 \n\t"
- "daddiu %[src_y2], %[src_y2], 8 \n\t"
- "daddiu %[dst_sobelx], %[dst_sobelx], 8 \n\t"
- "daddiu %[width], %[width], -8 \n\t"
- "bgtz %[width], 1b \n\t"
- "nop \n\t"
- : [sobel] "=&f"(sobel), [y00] "=&f"(y00), [y10] "=&f"(y10),
- [y20] "=&f"(y20), [y02] "=&f"(y02), [y12] "=&f"(y12), [y22] "=&f"(y22)
- : [src_y0] "r"(src_y0), [src_y1] "r"(src_y1), [src_y2] "r"(src_y2),
- [dst_sobelx] "r"(dst_sobelx), [width] "r"(width), [zero] "f"(zero)
- : "memory");
-}
-
-void SobelYRow_MMI(const uint8_t* src_y0,
- const uint8_t* src_y1,
- uint8_t* dst_sobely,
- int width) {
- uint64_t y00 = 0, y01 = 0, y02 = 0;
- uint64_t y10 = 0, y11 = 0, y12 = 0;
- uint64_t zero = 0x0;
- uint64_t sobel = 0x0;
- __asm__ volatile(
- "1: \n\t"
- "gsldlc1 %[y00], 0x07(%[src_y0]) \n\t" // a=src_y0[i]
- "gsldrc1 %[y00], 0x00(%[src_y0]) \n\t"
- "gsldlc1 %[y01], 0x08(%[src_y0]) \n\t" // b=src_y0[i+1]
- "gsldrc1 %[y01], 0x01(%[src_y0]) \n\t"
- "gsldlc1 %[y02], 0x09(%[src_y0]) \n\t" // c=src_y0[i+2]
- "gsldrc1 %[y02], 0x02(%[src_y0]) \n\t"
-
- "gsldlc1 %[y10], 0x07(%[src_y1]) \n\t" // a_sub=src_y1[i]
- "gsldrc1 %[y10], 0x00(%[src_y1]) \n\t"
- "gsldlc1 %[y11], 0x08(%[src_y1]) \n\t" // b_sub=src_y1[i+1]
- "gsldrc1 %[y11], 0x01(%[src_y1]) \n\t"
- "gsldlc1 %[y12], 0x09(%[src_y1]) \n\t" // c_sub=src_y1[i+2]
- "gsldrc1 %[y12], 0x02(%[src_y1]) \n\t"
-
- "punpcklbh %[y00], %[y00], %[zero] \n\t"
- "punpcklbh %[y01], %[y01], %[zero] \n\t"
- "punpcklbh %[y02], %[y02], %[zero] \n\t"
-
- "punpcklbh %[y10], %[y10], %[zero] \n\t"
- "punpcklbh %[y11], %[y11], %[zero] \n\t"
- "punpcklbh %[y12], %[y12], %[zero] \n\t"
-
- "paddh %[y00], %[y00], %[y01] \n\t" // a+b
- "paddh %[y02], %[y02], %[y01] \n\t" // c+b
- "paddh %[y00], %[y00], %[y02] \n\t" // a+2b+c
-
- "paddh %[y10], %[y10], %[y11] \n\t" // a_sub+b_sub
- "paddh %[y12], %[y12], %[y11] \n\t" // c_sub+b_sub
- "paddh %[y10], %[y10], %[y12] \n\t" // a_sub+2b_sub+c_sub
-
- "pmaxsh %[y02], %[y00], %[y10] \n\t"
- "pminsh %[y12], %[y00], %[y10] \n\t"
- "psubh %[sobel], %[y02], %[y12] \n\t" // Abs
-
- "gsldlc1 %[y00], 0x0B(%[src_y0]) \n\t"
- "gsldrc1 %[y00], 0x04(%[src_y0]) \n\t"
- "gsldlc1 %[y01], 0x0C(%[src_y0]) \n\t"
- "gsldrc1 %[y01], 0x05(%[src_y0]) \n\t"
- "gsldlc1 %[y02], 0x0D(%[src_y0]) \n\t"
- "gsldrc1 %[y02], 0x06(%[src_y0]) \n\t"
-
- "gsldlc1 %[y10], 0x0B(%[src_y1]) \n\t"
- "gsldrc1 %[y10], 0x04(%[src_y1]) \n\t"
- "gsldlc1 %[y11], 0x0C(%[src_y1]) \n\t"
- "gsldrc1 %[y11], 0x05(%[src_y1]) \n\t"
- "gsldlc1 %[y12], 0x0D(%[src_y1]) \n\t"
- "gsldrc1 %[y12], 0x06(%[src_y1]) \n\t"
-
- "punpcklbh %[y00], %[y00], %[zero] \n\t"
- "punpcklbh %[y01], %[y01], %[zero] \n\t"
- "punpcklbh %[y02], %[y02], %[zero] \n\t"
-
- "punpcklbh %[y10], %[y10], %[zero] \n\t"
- "punpcklbh %[y11], %[y11], %[zero] \n\t"
- "punpcklbh %[y12], %[y12], %[zero] \n\t"
-
- "paddh %[y00], %[y00], %[y01] \n\t"
- "paddh %[y02], %[y02], %[y01] \n\t"
- "paddh %[y00], %[y00], %[y02] \n\t"
-
- "paddh %[y10], %[y10], %[y11] \n\t"
- "paddh %[y12], %[y12], %[y11] \n\t"
- "paddh %[y10], %[y10], %[y12] \n\t"
-
- "pmaxsh %[y02], %[y00], %[y10] \n\t"
- "pminsh %[y12], %[y00], %[y10] \n\t"
- "psubh %[y00], %[y02], %[y12] \n\t"
-
- "packushb %[sobel], %[sobel], %[y00] \n\t" // clamp255
- "gssdrc1 %[sobel], 0(%[dst_sobely]) \n\t"
- "gssdlc1 %[sobel], 7(%[dst_sobely]) \n\t"
-
- "daddiu %[src_y0], %[src_y0], 8 \n\t"
- "daddiu %[src_y1], %[src_y1], 8 \n\t"
- "daddiu %[dst_sobely], %[dst_sobely], 8 \n\t"
- "daddiu %[width], %[width], -8 \n\t"
- "bgtz %[width], 1b \n\t"
- "nop \n\t"
- : [sobel] "=&f"(sobel), [y00] "=&f"(y00), [y01] "=&f"(y01),
- [y02] "=&f"(y02), [y10] "=&f"(y10), [y11] "=&f"(y11), [y12] "=&f"(y12)
- : [src_y0] "r"(src_y0), [src_y1] "r"(src_y1),
- [dst_sobely] "r"(dst_sobely), [width] "r"(width), [zero] "f"(zero)
- : "memory");
-}
-
-void SobelRow_MMI(const uint8_t* src_sobelx,
- const uint8_t* src_sobely,
- uint8_t* dst_argb,
- int width) {
- double temp[3];
- uint64_t c1 = 0xff000000ff000000;
- __asm__ volatile(
- "1: \n\t"
- "gsldlc1 %[t0], 0x07(%[src_sobelx]) \n\t" // a=src_sobelx[i]
- "gsldrc1 %[t0], 0x00(%[src_sobelx]) \n\t"
- "gsldlc1 %[t1], 0x07(%[src_sobely]) \n\t" // b=src_sobely[i]
- "gsldrc1 %[t1], 0x00(%[src_sobely]) \n\t"
- // s7 s6 s5 s4 s3 s2 s1 s0 = a+b
- "paddusb %[t2] , %[t0], %[t1] \n\t"
-
- // s3 s2 s1 s0->s3 s3 s2 s2 s1 s1 s0 s0
- "punpcklbh %[t0], %[t2], %[t2] \n\t"
-
- // s1 s1 s0 s0->s1 s2 s1 s1 s0 s0 s0 s0
- "punpcklbh %[t1], %[t0], %[t0] \n\t"
- "or %[t1], %[t1], %[c1] \n\t"
- // 255 s1 s1 s1 s55 s0 s0 s0
- "gssdrc1 %[t1], 0x00(%[dst_argb]) \n\t"
- "gssdlc1 %[t1], 0x07(%[dst_argb]) \n\t"
-
- // s3 s3 s2 s2->s3 s3 s3 s3 s2 s2 s2 s2
- "punpckhbh %[t1], %[t0], %[t0] \n\t"
- "or %[t1], %[t1], %[c1] \n\t"
- // 255 s3 s3 s3 255 s2 s2 s2
- "gssdrc1 %[t1], 0x08(%[dst_argb]) \n\t"
- "gssdlc1 %[t1], 0x0f(%[dst_argb]) \n\t"
-
- // s7 s6 s5 s4->s7 s7 s6 s6 s5 s5 s4 s4
- "punpckhbh %[t0], %[t2], %[t2] \n\t"
-
- // s5 s5 s4 s4->s5 s5 s5 s5 s4 s4 s4 s4
- "punpcklbh %[t1], %[t0], %[t0] \n\t"
- "or %[t1], %[t1], %[c1] \n\t"
- "gssdrc1 %[t1], 0x10(%[dst_argb]) \n\t"
- "gssdlc1 %[t1], 0x17(%[dst_argb]) \n\t"
-
- // s7 s7 s6 s6->s7 s7 s7 s7 s6 s6 s6 s6
- "punpckhbh %[t1], %[t0], %[t0] \n\t"
- "or %[t1], %[t1], %[c1] \n\t"
- "gssdrc1 %[t1], 0x18(%[dst_argb]) \n\t"
- "gssdlc1 %[t1], 0x1f(%[dst_argb]) \n\t"
-
- "daddiu %[dst_argb], %[dst_argb], 32 \n\t"
- "daddiu %[src_sobelx], %[src_sobelx], 8 \n\t"
- "daddiu %[src_sobely], %[src_sobely], 8 \n\t"
- "daddiu %[width], %[width], -8 \n\t"
- "bgtz %[width], 1b \n\t"
- "nop \n\t"
- : [t0] "=&f"(temp[0]), [t1] "=&f"(temp[1]), [t2] "=&f"(temp[2])
- : [src_sobelx] "r"(src_sobelx), [src_sobely] "r"(src_sobely),
- [dst_argb] "r"(dst_argb), [width] "r"(width), [c1] "f"(c1)
- : "memory");
-}
-
-void SobelToPlaneRow_MMI(const uint8_t* src_sobelx,
- const uint8_t* src_sobely,
- uint8_t* dst_y,
- int width) {
- uint64_t tr = 0;
- uint64_t tb = 0;
- __asm__ volatile(
- "1: \n\t"
- "gsldrc1 %[tr], 0x0(%[src_sobelx]) \n\t"
- "gsldlc1 %[tr], 0x7(%[src_sobelx]) \n\t" // r=src_sobelx[i]
- "gsldrc1 %[tb], 0x0(%[src_sobely]) \n\t"
- "gsldlc1 %[tb], 0x7(%[src_sobely]) \n\t" // b=src_sobely[i]
- "paddusb %[tr], %[tr], %[tb] \n\t" // g
- "gssdrc1 %[tr], 0x0(%[dst_y]) \n\t"
- "gssdlc1 %[tr], 0x7(%[dst_y]) \n\t"
-
- "daddiu %[dst_y], %[dst_y], 8 \n\t"
- "daddiu %[src_sobelx], %[src_sobelx], 8 \n\t"
- "daddiu %[src_sobely], %[src_sobely], 8 \n\t"
- "daddiu %[width], %[width], -8 \n\t"
- "bgtz %[width], 1b \n\t"
- "nop \n\t"
- : [tr] "=&f"(tr), [tb] "=&f"(tb)
- : [src_sobelx] "r"(src_sobelx), [src_sobely] "r"(src_sobely),
- [dst_y] "r"(dst_y), [width] "r"(width)
- : "memory");
-}
-
-void SobelXYRow_MMI(const uint8_t* src_sobelx,
- const uint8_t* src_sobely,
- uint8_t* dst_argb,
- int width) {
- uint64_t temp[3];
- uint64_t result = 0;
- uint64_t gb = 0;
- uint64_t cr = 0;
- uint64_t c1 = 0xffffffffffffffff;
- __asm__ volatile(
- "1: \n\t"
- "gsldlc1 %[tr], 0x07(%[src_sobelx]) \n\t" // r=src_sobelx[i]
- "gsldrc1 %[tr], 0x00(%[src_sobelx]) \n\t"
- "gsldlc1 %[tb], 0x07(%[src_sobely]) \n\t" // b=src_sobely[i]
- "gsldrc1 %[tb], 0x00(%[src_sobely]) \n\t"
- "paddusb %[tg] , %[tr], %[tb] \n\t" // g
-
- // g3 b3 g2 b2 g1 b1 g0 b0
- "punpcklbh %[gb], %[tb], %[tg] \n\t"
- // c3 r3 r2 r2 c1 r1 c0 r0
- "punpcklbh %[cr], %[tr], %[c1] \n\t"
- // c1 r1 g1 b1 c0 r0 g0 b0
- "punpcklhw %[result], %[gb], %[cr] \n\t"
- "gssdrc1 %[result], 0x00(%[dst_argb]) \n\t"
- "gssdlc1 %[result], 0x07(%[dst_argb]) \n\t"
- // c3 r3 g3 b3 c2 r2 g2 b2
- "punpckhhw %[result], %[gb], %[cr] \n\t"
- "gssdrc1 %[result], 0x08(%[dst_argb]) \n\t"
- "gssdlc1 %[result], 0x0f(%[dst_argb]) \n\t"
-
- // g7 b7 g6 b6 g5 b5 g4 b4
- "punpckhbh %[gb], %[tb], %[tg] \n\t"
- // c7 r7 c6 r6 c5 r5 c4 r4
- "punpckhbh %[cr], %[tr], %[c1] \n\t"
- // c5 r5 g5 b5 c4 r4 g4 b4
- "punpcklhw %[result], %[gb], %[cr] \n\t"
- "gssdrc1 %[result], 0x10(%[dst_argb]) \n\t"
- "gssdlc1 %[result], 0x17(%[dst_argb]) \n\t"
- // c7 r7 g7 b7 c6 r6 g6 b6
- "punpckhhw %[result], %[gb], %[cr] \n\t"
- "gssdrc1 %[result], 0x18(%[dst_argb]) \n\t"
- "gssdlc1 %[result], 0x1f(%[dst_argb]) \n\t"
-
- "daddiu %[dst_argb], %[dst_argb], 32 \n\t"
- "daddiu %[src_sobelx], %[src_sobelx], 8 \n\t"
- "daddiu %[src_sobely], %[src_sobely], 8 \n\t"
- "daddiu %[width], %[width], -8 \n\t"
- "bgtz %[width], 1b \n\t"
- "nop \n\t"
- : [tr] "=&f"(temp[0]), [tb] "=&f"(temp[1]), [tg] "=&f"(temp[2]),
- [gb] "=&f"(gb), [cr] "=&f"(cr), [result] "=&f"(result)
- : [src_sobelx] "r"(src_sobelx), [src_sobely] "r"(src_sobely),
- [dst_argb] "r"(dst_argb), [width] "r"(width), [c1] "f"(c1)
- : "memory");
-}
-
-void J400ToARGBRow_MMI(const uint8_t* src_y, uint8_t* dst_argb, int width) {
- // Copy a Y to RGB.
- uint64_t src, dest;
- const uint64_t mask0 = 0x00ffffff00ffffffULL;
- const uint64_t mask1 = ~mask0;
-
- __asm__ volatile(
- "1: \n\t"
- "gslwlc1 %[src], 0x03(%[src_ptr]) \n\t"
- "gslwrc1 %[src], 0x00(%[src_ptr]) \n\t"
- "punpcklbh %[src], %[src], %[src] \n\t"
- "punpcklhw %[dest], %[src], %[src] \n\t"
- "and %[dest], %[dest], %[mask0] \n\t"
- "or %[dest], %[dest], %[mask1] \n\t"
- "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
- "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
-
- "punpckhhw %[dest], %[src], %[src] \n\t"
- "and %[dest], %[dest], %[mask0] \n\t"
- "or %[dest], %[dest], %[mask1] \n\t"
- "gssdrc1 %[dest], 0x08(%[dst_ptr]) \n\t"
- "gssdlc1 %[dest], 0x0f(%[dst_ptr]) \n\t"
-
- "daddiu %[src_ptr], %[src_ptr], 0x04 \n\t"
- "daddiu %[dst_ptr], %[dst_ptr], 0x10 \n\t"
- "daddi %[width], %[width], -0x04 \n\t"
- "bnez %[width], 1b \n\t"
- : [src] "=&f"(src), [dest] "=&f"(dest)
- : [src_ptr] "r"(src_y), [dst_ptr] "r"(dst_argb), [mask0] "f"(mask0),
- [mask1] "f"(mask1), [width] "r"(width)
- : "memory");
-}
-
-// TODO - respect YuvConstants
-void I400ToARGBRow_MMI(const uint8_t* src_y, uint8_t* rgb_buf,
- const struct YuvConstants*, int width) {
- uint64_t src, src_lo, src_hi, dest, dest_lo, dest_hi;
- const uint64_t mask0 = 0x0;
- const uint64_t mask1 = 0x55;
- const uint64_t mask2 = 0xAA;
- const uint64_t mask3 = 0xFF;
- const uint64_t mask4 = 0x4A354A354A354A35ULL;
- const uint64_t mask5 = 0x0488048804880488ULL;
- const uint64_t shift0 = 0x08;
- const uint64_t shift1 = 0x06;
-
- __asm__ volatile(
- "1: \n\t"
- "gsldlc1 %[src], 0x07(%[src_ptr]) \n\t"
- "gsldrc1 %[src], 0x00(%[src_ptr]) \n\t"
- "punpcklbh %[src_lo], %[src], %[mask0] \n\t"
- "punpckhbh %[src_hi], %[src], %[mask0] \n\t"
-
- "pshufh %[src], %[src_lo], %[mask0] \n\t"
- "psllh %[dest_lo], %[src], %[shift0] \n\t"
- "paddush %[dest_lo], %[dest_lo], %[src] \n\t"
- "pmulhuh %[dest_lo], %[dest_lo], %[mask4] \n\t"
- "psubh %[dest_lo], %[dest_lo], %[mask5] \n\t"
- "psrah %[dest_lo], %[dest_lo], %[shift1] \n\t"
- "pinsrh_3 %[dest_lo], %[dest_lo], %[mask3] \n\t"
- "pshufh %[src], %[src_lo], %[mask1] \n\t"
- "psllh %[dest_hi], %[src], %[shift0] \n\t"
- "paddush %[dest_hi], %[dest_hi], %[src] \n\t"
- "pmulhuh %[dest_hi], %[dest_hi], %[mask4] \n\t"
- "psubh %[dest_hi], %[dest_hi], %[mask5] \n\t"
- "psrah %[dest_hi], %[dest_hi], %[shift1] \n\t"
- "pinsrh_3 %[dest_hi], %[dest_hi], %[mask3] \n\t"
- "packushb %[dest], %[dest_lo], %[dest_hi] \n\t"
- "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
- "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
-
- "pshufh %[src], %[src_lo], %[mask2] \n\t"
- "psllh %[dest_lo], %[src], %[shift0] \n\t"
- "paddush %[dest_lo], %[dest_lo], %[src] \n\t"
- "pmulhuh %[dest_lo], %[dest_lo], %[mask4] \n\t"
- "psubh %[dest_lo], %[dest_lo], %[mask5] \n\t"
- "psrah %[dest_lo], %[dest_lo], %[shift1] \n\t"
- "pinsrh_3 %[dest_lo], %[dest_lo], %[mask3] \n\t"
- "pshufh %[src], %[src_lo], %[mask3] \n\t"
- "psllh %[dest_hi], %[src], %[shift0] \n\t"
- "paddush %[dest_hi], %[dest_hi], %[src] \n\t"
- "pmulhuh %[dest_hi], %[dest_hi], %[mask4] \n\t"
- "psubh %[dest_hi], %[dest_hi], %[mask5] \n\t"
- "psrah %[dest_hi], %[dest_hi], %[shift1] \n\t"
- "pinsrh_3 %[dest_hi], %[dest_hi], %[mask3] \n\t"
- "packushb %[dest], %[dest_lo], %[dest_hi] \n\t"
- "gssdlc1 %[dest], 0x0f(%[dst_ptr]) \n\t"
- "gssdrc1 %[dest], 0x08(%[dst_ptr]) \n\t"
-
- "pshufh %[src], %[src_hi], %[mask0] \n\t"
- "psllh %[dest_lo], %[src], %[shift0] \n\t"
- "paddush %[dest_lo], %[dest_lo], %[src] \n\t"
- "pmulhuh %[dest_lo], %[dest_lo], %[mask4] \n\t"
- "psubh %[dest_lo], %[dest_lo], %[mask5] \n\t"
- "psrah %[dest_lo], %[dest_lo], %[shift1] \n\t"
- "pinsrh_3 %[dest_lo], %[dest_lo], %[mask3] \n\t"
- "pshufh %[src], %[src_hi], %[mask1] \n\t"
- "psllh %[dest_hi], %[src], %[shift0] \n\t"
- "paddush %[dest_hi], %[dest_hi], %[src] \n\t"
- "pmulhuh %[dest_hi], %[dest_hi], %[mask4] \n\t"
- "psubh %[dest_hi], %[dest_hi], %[mask5] \n\t"
- "psrah %[dest_hi], %[dest_hi], %[shift1] \n\t"
- "pinsrh_3 %[dest_hi], %[dest_hi], %[mask3] \n\t"
- "packushb %[dest], %[dest_lo], %[dest_hi] \n\t"
- "gssdlc1 %[dest], 0x17(%[dst_ptr]) \n\t"
- "gssdrc1 %[dest], 0x10(%[dst_ptr]) \n\t"
-
- "pshufh %[src], %[src_hi], %[mask2] \n\t"
- "psllh %[dest_lo], %[src], %[shift0] \n\t"
- "paddush %[dest_lo], %[dest_lo], %[src] \n\t"
- "pmulhuh %[dest_lo], %[dest_lo], %[mask4] \n\t"
- "psubh %[dest_lo], %[dest_lo], %[mask5] \n\t"
- "psrah %[dest_lo], %[dest_lo], %[shift1] \n\t"
- "pinsrh_3 %[dest_lo], %[dest_lo], %[mask3] \n\t"
- "pshufh %[src], %[src_hi], %[mask3] \n\t"
- "psllh %[dest_hi], %[src], %[shift0] \n\t"
- "paddush %[dest_hi], %[dest_hi], %[src] \n\t"
- "pmulhuh %[dest_hi], %[dest_hi], %[mask4] \n\t"
- "psubh %[dest_hi], %[dest_hi], %[mask5] \n\t"
- "psrah %[dest_hi], %[dest_hi], %[shift1] \n\t"
- "pinsrh_3 %[dest_hi], %[dest_hi], %[mask3] \n\t"
- "packushb %[dest], %[dest_lo], %[dest_hi] \n\t"
- "gssdlc1 %[dest], 0x1f(%[dst_ptr]) \n\t"
- "gssdrc1 %[dest], 0x18(%[dst_ptr]) \n\t"
-
- "daddi %[src_ptr], %[src_ptr], 0x08 \n\t"
- "daddiu %[dst_ptr], %[dst_ptr], 0x20 \n\t"
- "daddi %[width], %[width], -0x08 \n\t"
- "bnez %[width], 1b \n\t"
- : [src] "=&f"(src), [dest] "=&f"(dest), [src_hi] "=&f"(src_hi),
- [src_lo] "=&f"(src_lo), [dest_hi] "=&f"(dest_hi),
- [dest_lo] "=&f"(dest_lo)
- : [src_ptr] "r"(src_y), [dst_ptr] "r"(rgb_buf), [mask0] "f"(mask0),
- [mask1] "f"(mask1), [mask2] "f"(mask2), [mask3] "f"(mask3),
- [mask4] "f"(mask4), [mask5] "f"(mask5), [shift0] "f"(shift0),
- [shift1] "f"(shift1), [width] "r"(width)
- : "memory");
-}
-
-void MirrorRow_MMI(const uint8_t* src, uint8_t* dst, int width) {
- uint64_t source, src0, src1, dest;
- const uint64_t mask0 = 0x0;
- const uint64_t mask1 = 0x1b;
-
- src += width - 1;
- __asm__ volatile(
- "1: \n\t"
- "gsldlc1 %[source], 0(%[src_ptr]) \n\t"
- "gsldrc1 %[source], -7(%[src_ptr]) \n\t"
- "punpcklbh %[src0], %[source], %[mask0] \n\t"
- "pshufh %[src0], %[src0], %[mask1] \n\t"
- "punpckhbh %[src1], %[source], %[mask0] \n\t"
- "pshufh %[src1], %[src1], %[mask1] \n\t"
- "packushb %[dest], %[src1], %[src0] \n\t"
-
- "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
- "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
-
- "daddi %[src_ptr], %[src_ptr], -0x08 \n\t"
- "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t"
- "daddi %[width], %[width], -0x08 \n\t"
- "bnez %[width], 1b \n\t"
- : [source] "=&f"(source), [dest] "=&f"(dest), [src0] "=&f"(src0),
- [src1] "=&f"(src1)
- : [src_ptr] "r"(src), [dst_ptr] "r"(dst), [mask0] "f"(mask0),
- [mask1] "f"(mask1), [width] "r"(width)
- : "memory");
-}
-
-void MirrorSplitUVRow_MMI(const uint8_t* src_uv,
- uint8_t* dst_u,
- uint8_t* dst_v,
- int width) {
- uint64_t src0, src1, dest0, dest1;
- const uint64_t mask0 = 0x00ff00ff00ff00ffULL;
- const uint64_t mask1 = 0x1b;
- const uint64_t shift = 0x08;
-
- src_uv += (width - 1) << 1;
-
- __asm__ volatile(
- "1: \n\t"
- "gsldlc1 %[src0], 1(%[src_ptr]) \n\t"
- "gsldrc1 %[src0], -6(%[src_ptr]) \n\t"
- "gsldlc1 %[src1], -7(%[src_ptr]) \n\t"
- "gsldrc1 %[src1], -14(%[src_ptr]) \n\t"
-
- "and %[dest0], %[src0], %[mask0] \n\t"
- "pshufh %[dest0], %[dest0], %[mask1] \n\t"
- "and %[dest1], %[src1], %[mask0] \n\t"
- "pshufh %[dest1], %[dest1], %[mask1] \n\t"
- "packushb %[dest0], %[dest0], %[dest1] \n\t"
- "gssdlc1 %[dest0], 0x07(%[dstu_ptr]) \n\t"
- "gssdrc1 %[dest0], 0x00(%[dstu_ptr]) \n\t"
-
- "psrlh %[dest0], %[src0], %[shift] \n\t"
- "pshufh %[dest0], %[dest0], %[mask1] \n\t"
- "psrlh %[dest1], %[src1], %[shift] \n\t"
- "pshufh %[dest1], %[dest1], %[mask1] \n\t"
- "packushb %[dest0], %[dest0], %[dest1] \n\t"
- "gssdlc1 %[dest0], 0x07(%[dstv_ptr]) \n\t"
- "gssdrc1 %[dest0], 0x00(%[dstv_ptr]) \n\t"
-
- "daddi %[src_ptr], %[src_ptr], -0x10 \n\t"
- "daddiu %[dstu_ptr], %[dstu_ptr], 0x08 \n\t"
- "daddiu %[dstv_ptr], %[dstv_ptr], 0x08 \n\t"
- "daddi %[width], %[width], -0x08 \n\t"
- "bnez %[width], 1b \n\t"
- : [dest0] "=&f"(dest0), [dest1] "=&f"(dest1), [src0] "=&f"(src0),
- [src1] "=&f"(src1)
- : [src_ptr] "r"(src_uv), [dstu_ptr] "r"(dst_u), [dstv_ptr] "r"(dst_v),
- [width] "r"(width), [mask0] "f"(mask0), [mask1] "f"(mask1),
- [shift] "f"(shift)
- : "memory");
-}
-
-void ARGBMirrorRow_MMI(const uint8_t* src, uint8_t* dst, int width) {
- src += (width - 1) * 4;
- uint64_t temp = 0x0;
- uint64_t shuff = 0x4e; // 01 00 11 10
- __asm__ volatile(
- "1: \n\t"
- "gsldlc1 %[temp], 3(%[src]) \n\t"
- "gsldrc1 %[temp], -4(%[src]) \n\t"
- "pshufh %[temp], %[temp], %[shuff] \n\t"
- "gssdrc1 %[temp], 0x0(%[dst]) \n\t"
- "gssdlc1 %[temp], 0x7(%[dst]) \n\t"
-
- "daddiu %[src], %[src], -0x08 \n\t"
- "daddiu %[dst], %[dst], 0x08 \n\t"
- "daddiu %[width], %[width], -0x02 \n\t"
- "bnez %[width], 1b \n\t"
- : [temp] "=&f"(temp)
- : [src] "r"(src), [dst] "r"(dst), [width] "r"(width), [shuff] "f"(shuff)
- : "memory");
-}
-
-void SplitUVRow_MMI(const uint8_t* src_uv,
- uint8_t* dst_u,
- uint8_t* dst_v,
- int width) {
- uint64_t c0 = 0x00ff00ff00ff00ff;
- uint64_t temp[4];
- uint64_t shift = 0x08;
- __asm__ volatile(
- "1: \n\t"
- "gsldrc1 %[t0], 0x00(%[src_uv]) \n\t"
- "gsldlc1 %[t0], 0x07(%[src_uv]) \n\t"
- "gsldrc1 %[t1], 0x08(%[src_uv]) \n\t"
- "gsldlc1 %[t1], 0x0f(%[src_uv]) \n\t"
-
- "and %[t2], %[t0], %[c0] \n\t"
- "and %[t3], %[t1], %[c0] \n\t"
- "packushb %[t2], %[t2], %[t3] \n\t"
- "gssdrc1 %[t2], 0x0(%[dst_u]) \n\t"
- "gssdlc1 %[t2], 0x7(%[dst_u]) \n\t"
-
- "psrlh %[t2], %[t0], %[shift] \n\t"
- "psrlh %[t3], %[t1], %[shift] \n\t"
- "packushb %[t2], %[t2], %[t3] \n\t"
- "gssdrc1 %[t2], 0x0(%[dst_v]) \n\t"
- "gssdlc1 %[t2], 0x7(%[dst_v]) \n\t"
-
- "daddiu %[src_uv], %[src_uv], 16 \n\t"
- "daddiu %[dst_u], %[dst_u], 8 \n\t"
- "daddiu %[dst_v], %[dst_v], 8 \n\t"
- "daddiu %[width], %[width], -8 \n\t"
- "bgtz %[width], 1b \n\t"
- "nop \n\t"
- : [t0] "=&f"(temp[0]), [t1] "=&f"(temp[1]), [t2] "=&f"(temp[2]),
- [t3] "=&f"(temp[3])
- : [src_uv] "r"(src_uv), [dst_u] "r"(dst_u), [dst_v] "r"(dst_v),
- [width] "r"(width), [c0] "f"(c0), [shift] "f"(shift)
- : "memory");
-}
-
-void MergeUVRow_MMI(const uint8_t* src_u,
- const uint8_t* src_v,
- uint8_t* dst_uv,
- int width) {
- uint64_t temp[3];
- __asm__ volatile(
- "1: \n\t"
- "gsldrc1 %[t0], 0x0(%[src_u]) \n\t"
- "gsldlc1 %[t0], 0x7(%[src_u]) \n\t"
- "gsldrc1 %[t1], 0x0(%[src_v]) \n\t"
- "gsldlc1 %[t1], 0x7(%[src_v]) \n\t"
- "punpcklbh %[t2], %[t0], %[t1] \n\t"
- "gssdrc1 %[t2], 0x0(%[dst_uv]) \n\t"
- "gssdlc1 %[t2], 0x7(%[dst_uv]) \n\t"
- "punpckhbh %[t2], %[t0], %[t1] \n\t"
- "gssdrc1 %[t2], 0x8(%[dst_uv]) \n\t"
- "gssdlc1 %[t2], 0xf(%[dst_uv]) \n\t"
-
- "daddiu %[src_u], %[src_u], 8 \n\t"
- "daddiu %[src_v], %[src_v], 8 \n\t"
- "daddiu %[dst_uv], %[dst_uv], 16 \n\t"
- "daddiu %[width], %[width], -8 \n\t"
- "bgtz %[width], 1b \n\t"
- "nop \n\t"
- : [t0] "=&f"(temp[0]), [t1] "=&f"(temp[1]), [t2] "=&f"(temp[2])
- : [dst_uv] "r"(dst_uv), [src_u] "r"(src_u), [src_v] "r"(src_v),
- [width] "r"(width)
- : "memory");
-}
-
-void SplitRGBRow_MMI(const uint8_t* src_rgb,
- uint8_t* dst_r,
- uint8_t* dst_g,
- uint8_t* dst_b,
- int width) {
- uint64_t src[4];
- uint64_t dest_hi, dest_lo, dest;
-
- __asm__ volatile(
- "1: \n\t"
- "gslwlc1 %[src0], 0x03(%[src_ptr]) \n\t"
- "gslwrc1 %[src0], 0x00(%[src_ptr]) \n\t"
- "gslwlc1 %[src1], 0x06(%[src_ptr]) \n\t"
- "gslwrc1 %[src1], 0x03(%[src_ptr]) \n\t"
- "punpcklbh %[dest_lo], %[src0], %[src1] \n\t"
- "gslwlc1 %[src2], 0x09(%[src_ptr]) \n\t"
- "gslwrc1 %[src2], 0x06(%[src_ptr]) \n\t"
- "gslwlc1 %[src3], 0x0c(%[src_ptr]) \n\t"
- "gslwrc1 %[src3], 0x09(%[src_ptr]) \n\t"
- "punpcklbh %[dest_hi], %[src2], %[src3] \n\t"
-
- "punpcklhw %[dest], %[dest_lo], %[dest_hi] \n\t"
- "gsswlc1 %[dest], 0x03(%[dstr_ptr]) \n\t"
- "gsswrc1 %[dest], 0x00(%[dstr_ptr]) \n\t"
- "punpckhwd %[dest], %[dest], %[dest] \n\t"
- "gsswlc1 %[dest], 0x03(%[dstg_ptr]) \n\t"
- "gsswrc1 %[dest], 0x00(%[dstg_ptr]) \n\t"
- "punpckhhw %[dest], %[dest_lo], %[dest_hi] \n\t"
- "gsswlc1 %[dest], 0x03(%[dstb_ptr]) \n\t"
- "gsswrc1 %[dest], 0x00(%[dstb_ptr]) \n\t"
-
- "daddiu %[src_ptr], %[src_ptr], 0x0c \n\t"
- "daddiu %[dstr_ptr], %[dstr_ptr], 0x04 \n\t"
- "daddiu %[dstg_ptr], %[dstg_ptr], 0x04 \n\t"
- "daddiu %[dstb_ptr], %[dstb_ptr], 0x04 \n\t"
- "daddi %[width], %[width], -0x04 \n\t"
- "bnez %[width], 1b \n\t"
- : [src0] "=&f"(src[0]), [src1] "=&f"(src[1]), [src2] "=&f"(src[2]),
- [src3] "=&f"(src[3]), [dest_hi] "=&f"(dest_hi),
- [dest_lo] "=&f"(dest_lo), [dest] "=&f"(dest)
- : [src_ptr] "r"(src_rgb), [dstr_ptr] "r"(dst_r), [dstg_ptr] "r"(dst_g),
- [dstb_ptr] "r"(dst_b), [width] "r"(width)
- : "memory");
-}
-
-void MergeRGBRow_MMI(const uint8_t* src_r,
- const uint8_t* src_g,
- const uint8_t* src_b,
- uint8_t* dst_rgb,
- int width) {
- uint64_t srcr, srcg, srcb, dest;
- uint64_t srcrg_hi, srcrg_lo, srcbz_hi, srcbz_lo;
- const uint64_t temp = 0x0;
-
- __asm__ volatile(
- "1: \n\t"
- "gsldlc1 %[srcr], 0x07(%[srcr_ptr]) \n\t"
- "gsldrc1 %[srcr], 0x00(%[srcr_ptr]) \n\t"
- "gsldlc1 %[srcg], 0x07(%[srcg_ptr]) \n\t"
- "gsldrc1 %[srcg], 0x00(%[srcg_ptr]) \n\t"
- "punpcklbh %[srcrg_lo], %[srcr], %[srcg] \n\t"
- "punpckhbh %[srcrg_hi], %[srcr], %[srcg] \n\t"
-
- "gsldlc1 %[srcb], 0x07(%[srcb_ptr]) \n\t"
- "gsldrc1 %[srcb], 0x00(%[srcb_ptr]) \n\t"
- "punpcklbh %[srcbz_lo], %[srcb], %[temp] \n\t"
- "punpckhbh %[srcbz_hi], %[srcb], %[temp] \n\t"
-
- "punpcklhw %[dest], %[srcrg_lo], %[srcbz_lo] \n\t"
- "gsswlc1 %[dest], 0x03(%[dst_ptr]) \n\t"
- "gsswrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
- "punpckhwd %[dest], %[dest], %[dest] \n\t"
- "gsswlc1 %[dest], 0x06(%[dst_ptr]) \n\t"
- "gsswrc1 %[dest], 0x03(%[dst_ptr]) \n\t"
- "punpckhhw %[dest], %[srcrg_lo], %[srcbz_lo] \n\t"
- "gsswlc1 %[dest], 0x09(%[dst_ptr]) \n\t"
- "gsswrc1 %[dest], 0x06(%[dst_ptr]) \n\t"
- "punpckhwd %[dest], %[dest], %[dest] \n\t"
- "gsswlc1 %[dest], 0x0c(%[dst_ptr]) \n\t"
- "gsswrc1 %[dest], 0x09(%[dst_ptr]) \n\t"
- "punpcklhw %[dest], %[srcrg_hi], %[srcbz_hi] \n\t"
- "gsswlc1 %[dest], 0x0f(%[dst_ptr]) \n\t"
- "gsswrc1 %[dest], 0x0c(%[dst_ptr]) \n\t"
- "punpckhwd %[dest], %[dest], %[dest] \n\t"
- "gsswlc1 %[dest], 0x12(%[dst_ptr]) \n\t"
- "gsswrc1 %[dest], 0x0f(%[dst_ptr]) \n\t"
- "punpckhhw %[dest], %[srcrg_hi], %[srcbz_hi] \n\t"
- "gsswlc1 %[dest], 0x15(%[dst_ptr]) \n\t"
- "gsswrc1 %[dest], 0x12(%[dst_ptr]) \n\t"
- "punpckhwd %[dest], %[dest], %[dest] \n\t"
- "gsswlc1 %[dest], 0x18(%[dst_ptr]) \n\t"
- "gsswrc1 %[dest], 0x15(%[dst_ptr]) \n\t"
-
- "daddiu %[srcr_ptr], %[srcr_ptr], 0x08 \n\t"
- "daddiu %[srcg_ptr], %[srcg_ptr], 0x08 \n\t"
- "daddiu %[srcb_ptr], %[srcb_ptr], 0x08 \n\t"
- "daddiu %[dst_ptr], %[dst_ptr], 0x18 \n\t"
- "daddi %[width], %[width], -0x08 \n\t"
- "bnez %[width], 1b \n\t"
- : [srcr] "=&f"(srcr), [srcg] "=&f"(srcg), [srcb] "=&f"(srcb),
- [dest] "=&f"(dest), [srcrg_hi] "=&f"(srcrg_hi),
- [srcrg_lo] "=&f"(srcrg_lo), [srcbz_hi] "=&f"(srcbz_hi),
- [srcbz_lo] "=&f"(srcbz_lo)
- : [srcr_ptr] "r"(src_r), [srcg_ptr] "r"(src_g), [srcb_ptr] "r"(src_b),
- [dst_ptr] "r"(dst_rgb), [width] "r"(width), [temp] "f"(temp)
- : "memory");
-}
-
-// Filter 2 rows of YUY2 UV's (422) into U and V (420).
-void YUY2ToUVRow_MMI(const uint8_t* src_yuy2,
- int src_stride_yuy2,
- uint8_t* dst_u,
- uint8_t* dst_v,
- int width) {
- uint64_t c0 = 0xff00ff00ff00ff00;
- uint64_t c1 = 0x00ff00ff00ff00ff;
- uint64_t temp[3];
- uint64_t data[4];
- uint64_t shift = 0x08;
- uint64_t src_stride = 0x0;
- __asm__ volatile(
- "1: \n\t"
- "gsldrc1 %[t0], 0x00(%[src_yuy2]) \n\t"
- "gsldlc1 %[t0], 0x07(%[src_yuy2]) \n\t"
- "daddu %[src_stride], %[src_yuy2], %[src_stride_yuy2] \n\t"
- "gsldrc1 %[t1], 0x00(%[src_stride]) \n\t"
- "gsldlc1 %[t1], 0x07(%[src_stride]) \n\t"
- "pavgb %[t0], %[t0], %[t1] \n\t"
-
- "gsldrc1 %[t2], 0x08(%[src_yuy2]) \n\t"
- "gsldlc1 %[t2], 0x0f(%[src_yuy2]) \n\t"
- "gsldrc1 %[t1], 0x08(%[src_stride]) \n\t"
- "gsldlc1 %[t1], 0x0f(%[src_stride]) \n\t"
- "pavgb %[t1], %[t2], %[t1] \n\t"
-
- "and %[t0], %[t0], %[c0] \n\t"
- "and %[t1], %[t1], %[c0] \n\t"
- "psrlh %[t0], %[t0], %[shift] \n\t"
- "psrlh %[t1], %[t1], %[shift] \n\t"
- "packushb %[t0], %[t0], %[t1] \n\t"
- "mov.s %[t1], %[t0] \n\t"
- "and %[d0], %[t0], %[c1] \n\t"
- "psrlh %[d1], %[t1], %[shift] \n\t"
-
- "gsldrc1 %[t0], 0x10(%[src_yuy2]) \n\t"
- "gsldlc1 %[t0], 0x17(%[src_yuy2]) \n\t"
- "gsldrc1 %[t1], 0x10(%[src_stride]) \n\t"
- "gsldlc1 %[t1], 0x17(%[src_stride]) \n\t"
- "pavgb %[t0], %[t0], %[t1] \n\t"
-
- "gsldrc1 %[t2], 0x18(%[src_yuy2]) \n\t"
- "gsldlc1 %[t2], 0x1f(%[src_yuy2]) \n\t"
- "gsldrc1 %[t1], 0x18(%[src_stride]) \n\t"
- "gsldlc1 %[t1], 0x1f(%[src_stride]) \n\t"
- "pavgb %[t1], %[t2], %[t1] \n\t"
-
- "and %[t0], %[t0], %[c0] \n\t"
- "and %[t1], %[t1], %[c0] \n\t"
- "psrlh %[t0], %[t0], %[shift] \n\t"
- "psrlh %[t1], %[t1], %[shift] \n\t"
- "packushb %[t0], %[t0], %[t1] \n\t"
- "mov.s %[t1], %[t0] \n\t"
- "and %[d2], %[t0], %[c1] \n\t"
- "psrlh %[d3], %[t1], %[shift] \n\t"
-
- "packushb %[d0], %[d0], %[d2] \n\t"
- "packushb %[d1], %[d1], %[d3] \n\t"
- "gssdrc1 %[d0], 0x0(%[dst_u]) \n\t"
- "gssdlc1 %[d0], 0x7(%[dst_u]) \n\t"
- "gssdrc1 %[d1], 0x0(%[dst_v]) \n\t"
- "gssdlc1 %[d1], 0x7(%[dst_v]) \n\t"
- "daddiu %[src_yuy2], %[src_yuy2], 32 \n\t"
- "daddiu %[dst_u], %[dst_u], 8 \n\t"
- "daddiu %[dst_v], %[dst_v], 8 \n\t"
- "daddiu %[width], %[width], -16 \n\t"
- "bgtz %[width], 1b \n\t"
- "nop \n\t"
- : [t0] "=&f"(temp[0]), [t1] "=&f"(temp[1]), [t2] "=&f"(temp[2]),
- [d0] "=&f"(data[0]), [d1] "=&f"(data[1]), [d2] "=&f"(data[2]),
- [d3] "=&f"(data[3]), [src_stride] "=&r"(src_stride)
- : [src_yuy2] "r"(src_yuy2), [src_stride_yuy2] "r"(src_stride_yuy2),
- [dst_u] "r"(dst_u), [dst_v] "r"(dst_v), [width] "r"(width),
- [c0] "f"(c0), [c1] "f"(c1), [shift] "f"(shift)
- : "memory");
-}
-
-// Copy row of YUY2 UV's (422) into U and V (422).
-void YUY2ToUV422Row_MMI(const uint8_t* src_yuy2,
- uint8_t* dst_u,
- uint8_t* dst_v,
- int width) {
- uint64_t c0 = 0xff00ff00ff00ff00;
- uint64_t c1 = 0x00ff00ff00ff00ff;
- uint64_t temp[2];
- uint64_t data[4];
- uint64_t shift = 0x08;
- __asm__ volatile(
- "1: \n\t"
- "gsldrc1 %[t0], 0x00(%[src_yuy2]) \n\t"
- "gsldlc1 %[t0], 0x07(%[src_yuy2]) \n\t"
- "gsldrc1 %[t1], 0x08(%[src_yuy2]) \n\t"
- "gsldlc1 %[t1], 0x0f(%[src_yuy2]) \n\t"
- "and %[t0], %[t0], %[c0] \n\t"
- "and %[t1], %[t1], %[c0] \n\t"
- "psrlh %[t0], %[t0], %[shift] \n\t"
- "psrlh %[t1], %[t1], %[shift] \n\t"
- "packushb %[t0], %[t0], %[t1] \n\t"
- "mov.s %[t1], %[t0] \n\t"
- "and %[d0], %[t0], %[c1] \n\t"
- "psrlh %[d1], %[t1], %[shift] \n\t"
-
- "gsldrc1 %[t0], 0x10(%[src_yuy2]) \n\t"
- "gsldlc1 %[t0], 0x17(%[src_yuy2]) \n\t"
- "gsldrc1 %[t1], 0x18(%[src_yuy2]) \n\t"
- "gsldlc1 %[t1], 0x1f(%[src_yuy2]) \n\t"
- "and %[t0], %[t0], %[c0] \n\t"
- "and %[t1], %[t1], %[c0] \n\t"
- "psrlh %[t0], %[t0], %[shift] \n\t"
- "psrlh %[t1], %[t1], %[shift] \n\t"
- "packushb %[t0], %[t0], %[t1] \n\t"
- "mov.s %[t1], %[t0] \n\t"
- "and %[d2], %[t0], %[c1] \n\t"
- "psrlh %[d3], %[t1], %[shift] \n\t"
-
- "packushb %[d0], %[d0], %[d2] \n\t"
- "packushb %[d1], %[d1], %[d3] \n\t"
- "gssdrc1 %[d0], 0x0(%[dst_u]) \n\t"
- "gssdlc1 %[d0], 0x7(%[dst_u]) \n\t"
- "gssdrc1 %[d1], 0x0(%[dst_v]) \n\t"
- "gssdlc1 %[d1], 0x7(%[dst_v]) \n\t"
- "daddiu %[src_yuy2], %[src_yuy2], 32 \n\t"
- "daddiu %[dst_u], %[dst_u], 8 \n\t"
- "daddiu %[dst_v], %[dst_v], 8 \n\t"
- "daddiu %[width], %[width], -16 \n\t"
- "bgtz %[width], 1b \n\t"
- "nop \n\t"
- : [t0] "=&f"(temp[0]), [t1] "=&f"(temp[1]), [d0] "=&f"(data[0]),
- [d1] "=&f"(data[1]), [d2] "=&f"(data[2]), [d3] "=&f"(data[3])
- : [src_yuy2] "r"(src_yuy2), [dst_u] "r"(dst_u), [dst_v] "r"(dst_v),
- [width] "r"(width), [c0] "f"(c0), [c1] "f"(c1), [shift] "f"(shift)
- : "memory");
-}
-
-// Copy row of YUY2 Y's (422) into Y (420/422).
-void YUY2ToYRow_MMI(const uint8_t* src_yuy2, uint8_t* dst_y, int width) {
- uint64_t c0 = 0x00ff00ff00ff00ff;
- uint64_t temp[2];
- __asm__ volatile(
- "1: \n\t"
- "gsldrc1 %[t0], 0x00(%[src_yuy2]) \n\t"
- "gsldlc1 %[t0], 0x07(%[src_yuy2]) \n\t"
- "gsldrc1 %[t1], 0x08(%[src_yuy2]) \n\t"
- "gsldlc1 %[t1], 0x0f(%[src_yuy2]) \n\t"
- "and %[t0], %[t0], %[c0] \n\t"
- "and %[t1], %[t1], %[c0] \n\t"
- "packushb %[t0], %[t0], %[t1] \n\t"
- "gssdrc1 %[t0], 0x0(%[dst_y]) \n\t"
- "gssdlc1 %[t0], 0x7(%[dst_y]) \n\t"
- "daddiu %[src_yuy2], %[src_yuy2], 16 \n\t"
- "daddiu %[dst_y], %[dst_y], 8 \n\t"
- "daddiu %[width], %[width], -8 \n\t"
- "bgtz %[width], 1b \n\t"
- "nop \n\t"
- : [t0] "=&f"(temp[0]), [t1] "=&f"(temp[1])
- : [src_yuy2] "r"(src_yuy2), [dst_y] "r"(dst_y), [width] "r"(width),
- [c0] "f"(c0)
- : "memory");
-}
-
-// Filter 2 rows of UYVY UV's (422) into U and V (420).
-void UYVYToUVRow_MMI(const uint8_t* src_uyvy,
- int src_stride_uyvy,
- uint8_t* dst_u,
- uint8_t* dst_v,
- int width) {
- // Output a row of UV values.
- uint64_t c0 = 0x00ff00ff00ff00ff;
- uint64_t temp[3];
- uint64_t data[4];
- uint64_t shift = 0x08;
- uint64_t src_stride = 0x0;
- __asm__ volatile(
- "1: \n\t"
- "gsldrc1 %[t0], 0x00(%[src_uyvy]) \n\t"
- "gsldlc1 %[t0], 0x07(%[src_uyvy]) \n\t"
- "daddu %[src_stride], %[src_uyvy], %[src_stride_uyvy] \n\t"
- "gsldrc1 %[t1], 0x00(%[src_stride]) \n\t"
- "gsldlc1 %[t1], 0x07(%[src_stride]) \n\t"
- "pavgb %[t0], %[t0], %[t1] \n\t"
-
- "gsldrc1 %[t2], 0x08(%[src_uyvy]) \n\t"
- "gsldlc1 %[t2], 0x0f(%[src_uyvy]) \n\t"
- "gsldrc1 %[t1], 0x08(%[src_stride]) \n\t"
- "gsldlc1 %[t1], 0x0f(%[src_stride]) \n\t"
- "pavgb %[t1], %[t2], %[t1] \n\t"
-
- "and %[t0], %[t0], %[c0] \n\t"
- "and %[t1], %[t1], %[c0] \n\t"
- "packushb %[t0], %[t0], %[t1] \n\t"
- "mov.s %[t1], %[t0] \n\t"
- "and %[d0], %[t0], %[c0] \n\t"
- "psrlh %[d1], %[t1], %[shift] \n\t"
-
- "gsldrc1 %[t0], 0x10(%[src_uyvy]) \n\t"
- "gsldlc1 %[t0], 0x17(%[src_uyvy]) \n\t"
- "gsldrc1 %[t1], 0x10(%[src_stride]) \n\t"
- "gsldlc1 %[t1], 0x17(%[src_stride]) \n\t"
- "pavgb %[t0], %[t0], %[t1] \n\t"
-
- "gsldrc1 %[t2], 0x18(%[src_uyvy]) \n\t"
- "gsldlc1 %[t2], 0x1f(%[src_uyvy]) \n\t"
- "gsldrc1 %[t1], 0x18(%[src_stride]) \n\t"
- "gsldlc1 %[t1], 0x1f(%[src_stride]) \n\t"
- "pavgb %[t1], %[t2], %[t1] \n\t"
-
- "and %[t0], %[t0], %[c0] \n\t"
- "and %[t1], %[t1], %[c0] \n\t"
- "packushb %[t0], %[t0], %[t1] \n\t"
- "mov.s %[t1], %[t0] \n\t"
- "and %[d2], %[t0], %[c0] \n\t"
- "psrlh %[d3], %[t1], %[shift] \n\t"
-
- "packushb %[d0], %[d0], %[d2] \n\t"
- "packushb %[d1], %[d1], %[d3] \n\t"
- "gssdrc1 %[d0], 0x0(%[dst_u]) \n\t"
- "gssdlc1 %[d0], 0x7(%[dst_u]) \n\t"
- "gssdrc1 %[d1], 0x0(%[dst_v]) \n\t"
- "gssdlc1 %[d1], 0x7(%[dst_v]) \n\t"
- "daddiu %[src_uyvy], %[src_uyvy], 32 \n\t"
- "daddiu %[dst_u], %[dst_u], 8 \n\t"
- "daddiu %[dst_v], %[dst_v], 8 \n\t"
- "daddiu %[width], %[width], -16 \n\t"
- "bgtz %[width], 1b \n\t"
- "nop \n\t"
- : [t0] "=&f"(temp[0]), [t1] "=&f"(temp[1]), [t2] "=&f"(temp[2]),
- [d0] "=&f"(data[0]), [d1] "=&f"(data[1]), [d2] "=&f"(data[2]),
- [d3] "=&f"(data[3]), [src_stride] "=&r"(src_stride)
- : [src_uyvy] "r"(src_uyvy), [src_stride_uyvy] "r"(src_stride_uyvy),
- [dst_u] "r"(dst_u), [dst_v] "r"(dst_v), [width] "r"(width),
- [c0] "f"(c0), [shift] "f"(shift)
- : "memory");
-}
-
-// Copy row of UYVY UV's (422) into U and V (422).
-void UYVYToUV422Row_MMI(const uint8_t* src_uyvy,
- uint8_t* dst_u,
- uint8_t* dst_v,
- int width) {
- // Output a row of UV values.
- uint64_t c0 = 0x00ff00ff00ff00ff;
- uint64_t temp[2];
- uint64_t data[4];
- uint64_t shift = 0x08;
- __asm__ volatile(
- "1: \n\t"
- "gsldrc1 %[t0], 0x00(%[src_uyvy]) \n\t"
- "gsldlc1 %[t0], 0x07(%[src_uyvy]) \n\t"
- "gsldrc1 %[t1], 0x08(%[src_uyvy]) \n\t"
- "gsldlc1 %[t1], 0x0f(%[src_uyvy]) \n\t"
- "and %[t0], %[t0], %[c0] \n\t"
- "and %[t1], %[t1], %[c0] \n\t"
- "packushb %[t0], %[t0], %[t1] \n\t"
- "mov.s %[t1], %[t0] \n\t"
- "and %[d0], %[t0], %[c0] \n\t"
- "psrlh %[d1], %[t1], %[shift] \n\t"
-
- "gsldrc1 %[t0], 0x10(%[src_uyvy]) \n\t"
- "gsldlc1 %[t0], 0x17(%[src_uyvy]) \n\t"
- "gsldrc1 %[t1], 0x18(%[src_uyvy]) \n\t"
- "gsldlc1 %[t1], 0x1f(%[src_uyvy]) \n\t"
- "and %[t0], %[t0], %[c0] \n\t"
- "and %[t1], %[t1], %[c0] \n\t"
- "packushb %[t0], %[t0], %[t1] \n\t"
- "mov.s %[t1], %[t0] \n\t"
- "and %[d2], %[t0], %[c0] \n\t"
- "psrlh %[d3], %[t1], %[shift] \n\t"
-
- "packushb %[d0], %[d0], %[d2] \n\t"
- "packushb %[d1], %[d1], %[d3] \n\t"
- "gssdrc1 %[d0], 0x0(%[dst_u]) \n\t"
- "gssdlc1 %[d0], 0x7(%[dst_u]) \n\t"
- "gssdrc1 %[d1], 0x0(%[dst_v]) \n\t"
- "gssdlc1 %[d1], 0x7(%[dst_v]) \n\t"
- "daddiu %[src_uyvy], %[src_uyvy], 32 \n\t"
- "daddiu %[dst_u], %[dst_u], 8 \n\t"
- "daddiu %[dst_v], %[dst_v], 8 \n\t"
- "daddiu %[width], %[width], -16 \n\t"
- "bgtz %[width], 1b \n\t"
- "nop \n\t"
- : [t0] "=&f"(temp[0]), [t1] "=&f"(temp[1]), [d0] "=&f"(data[0]),
- [d1] "=&f"(data[1]), [d2] "=&f"(data[2]), [d3] "=&f"(data[3])
- : [src_uyvy] "r"(src_uyvy), [dst_u] "r"(dst_u), [dst_v] "r"(dst_v),
- [width] "r"(width), [c0] "f"(c0), [shift] "f"(shift)
- : "memory");
-}
-
-// Copy row of UYVY Y's (422) into Y (420/422).
-void UYVYToYRow_MMI(const uint8_t* src_uyvy, uint8_t* dst_y, int width) {
- // Output a row of Y values.
- uint64_t c0 = 0x00ff00ff00ff00ff;
- uint64_t shift = 0x08;
- uint64_t temp[2];
- __asm__ volatile(
- "1: \n\t"
- "gsldrc1 %[t0], 0x00(%[src_uyvy]) \n\t"
- "gsldlc1 %[t0], 0x07(%[src_uyvy]) \n\t"
- "gsldrc1 %[t1], 0x08(%[src_uyvy]) \n\t"
- "gsldlc1 %[t1], 0x0f(%[src_uyvy]) \n\t"
- "dsrl %[t0], %[t0], %[shift] \n\t"
- "dsrl %[t1], %[t1], %[shift] \n\t"
- "and %[t0], %[t0], %[c0] \n\t"
- "and %[t1], %[t1], %[c0] \n\t"
- "and %[t1], %[t1], %[c0] \n\t"
- "packushb %[t0], %[t0], %[t1] \n\t"
- "gssdrc1 %[t0], 0x0(%[dst_y]) \n\t"
- "gssdlc1 %[t0], 0x7(%[dst_y]) \n\t"
- "daddiu %[src_uyvy], %[src_uyvy], 16 \n\t"
- "daddiu %[dst_y], %[dst_y], 8 \n\t"
- "daddiu %[width], %[width], -8 \n\t"
- "bgtz %[width], 1b \n\t"
- "nop \n\t"
- : [t0] "=&f"(temp[0]), [t1] "=&f"(temp[1])
- : [src_uyvy] "r"(src_uyvy), [dst_y] "r"(dst_y), [width] "r"(width),
- [c0] "f"(c0), [shift] "f"(shift)
- : "memory");
-}
-
-// Blend src_argb over src_argb1 and store to dst_argb.
-// dst_argb may be src_argb or src_argb1.
-// This code mimics the SSSE3 version for better testability.
-void ARGBBlendRow_MMI(const uint8_t* src_argb,
- const uint8_t* src_argb1,
- uint8_t* dst_argb,
- int width) {
- uint64_t src0, src1, dest, alpha, src0_hi, src0_lo, src1_hi, src1_lo, dest_hi,
- dest_lo;
- const uint64_t mask0 = 0x0;
- const uint64_t mask1 = 0x00FFFFFF00FFFFFFULL;
- const uint64_t mask2 = 0x00FF00FF00FF00FFULL;
- const uint64_t mask3 = 0xFF;
- const uint64_t mask4 = ~mask1;
- const uint64_t shift = 0x08;
-
- __asm__ volatile(
- "1: \n\t"
- "gsldlc1 %[src0], 0x07(%[src0_ptr]) \n\t"
- "gsldrc1 %[src0], 0x00(%[src0_ptr]) \n\t"
- "punpcklbh %[src0_lo], %[src0], %[mask0] \n\t"
-
- "gsldlc1 %[src1], 0x07(%[src1_ptr]) \n\t"
- "gsldrc1 %[src1], 0x00(%[src1_ptr]) \n\t"
- "punpcklbh %[src1_lo], %[src1], %[mask0] \n\t"
-
- "psubush %[alpha], %[mask2], %[src0_lo] \n\t"
- "pshufh %[alpha], %[alpha], %[mask3] \n\t"
- "pmullh %[dest_lo], %[src1_lo], %[alpha] \n\t"
- "psrlh %[dest_lo], %[dest_lo], %[shift] \n\t"
- "paddush %[dest_lo], %[dest_lo], %[src0_lo] \n\t"
-
- "punpckhbh %[src0_hi], %[src0], %[mask0] \n\t"
- "punpckhbh %[src1_hi], %[src1], %[mask0] \n\t"
-
- "psubush %[alpha], %[mask2], %[src0_hi] \n\t"
- "pshufh %[alpha], %[alpha], %[mask3] \n\t"
- "pmullh %[dest_hi], %[src1_hi], %[alpha] \n\t"
- "psrlh %[dest_hi], %[dest_hi], %[shift] \n\t"
- "paddush %[dest_hi], %[dest_hi], %[src0_hi] \n\t"
-
- "packushb %[dest], %[dest_lo], %[dest_hi] \n\t"
- "and %[dest], %[dest], %[mask1] \n\t"
- "or %[dest], %[dest], %[mask4] \n\t"
- "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
- "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
-
- "daddiu %[src0_ptr], %[src0_ptr], 0x08 \n\t"
- "daddiu %[src1_ptr], %[src1_ptr], 0x08 \n\t"
- "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t"
- "daddi %[width], %[width], -0x02 \n\t"
- "bnez %[width], 1b \n\t"
- : [src0] "=&f"(src0), [src1] "=&f"(src1), [alpha] "=&f"(alpha),
- [dest] "=&f"(dest), [src0_hi] "=&f"(src0_hi), [src0_lo] "=&f"(src0_lo),
- [src1_hi] "=&f"(src1_hi), [src1_lo] "=&f"(src1_lo),
- [dest_hi] "=&f"(dest_hi), [dest_lo] "=&f"(dest_lo)
- : [src0_ptr] "r"(src_argb), [src1_ptr] "r"(src_argb1),
- [dst_ptr] "r"(dst_argb), [mask0] "f"(mask0), [mask1] "f"(mask1),
- [mask2] "f"(mask2), [mask3] "f"(mask3), [mask4] "f"(mask4),
- [shift] "f"(shift), [width] "r"(width)
- : "memory");
-}
-
-void BlendPlaneRow_MMI(const uint8_t* src0,
- const uint8_t* src1,
- const uint8_t* alpha,
- uint8_t* dst,
- int width) {
- uint64_t source0, source1, dest, alph;
- uint64_t src0_hi, src0_lo, src1_hi, src1_lo, alpha_hi, alpha_lo, dest_hi,
- dest_lo;
- uint64_t alpha_rev, alpha_rev_lo, alpha_rev_hi;
- const uint64_t mask0 = 0x0;
- const uint64_t mask1 = 0xFFFFFFFFFFFFFFFFULL;
- const uint64_t mask2 = 0x00FF00FF00FF00FFULL;
- const uint64_t shift = 0x08;
-
- __asm__ volatile(
- "1: \n\t"
- "gsldlc1 %[src0], 0x07(%[src0_ptr]) \n\t"
- "gsldrc1 %[src0], 0x00(%[src0_ptr]) \n\t"
- "punpcklbh %[src0_lo], %[src0], %[mask0] \n\t"
- "punpckhbh %[src0_hi], %[src0], %[mask0] \n\t"
-
- "gsldlc1 %[src1], 0x07(%[src1_ptr]) \n\t"
- "gsldrc1 %[src1], 0x00(%[src1_ptr]) \n\t"
- "punpcklbh %[src1_lo], %[src1], %[mask0] \n\t"
- "punpckhbh %[src1_hi], %[src1], %[mask0] \n\t"
-
- "gsldlc1 %[alpha], 0x07(%[alpha_ptr]) \n\t"
- "gsldrc1 %[alpha], 0x00(%[alpha_ptr]) \n\t"
- "psubusb %[alpha_r], %[mask1], %[alpha] \n\t"
- "punpcklbh %[alpha_lo], %[alpha], %[mask0] \n\t"
- "punpckhbh %[alpha_hi], %[alpha], %[mask0] \n\t"
- "punpcklbh %[alpha_rlo], %[alpha_r], %[mask0] \n\t"
- "punpckhbh %[alpha_rhi], %[alpha_r], %[mask0] \n\t"
-
- "pmullh %[dest_lo], %[src0_lo], %[alpha_lo] \n\t"
- "pmullh %[dest], %[src1_lo], %[alpha_rlo] \n\t"
- "paddush %[dest_lo], %[dest_lo], %[dest] \n\t"
- "paddush %[dest_lo], %[dest_lo], %[mask2] \n\t"
- "psrlh %[dest_lo], %[dest_lo], %[shift] \n\t"
-
- "pmullh %[dest_hi], %[src0_hi], %[alpha_hi] \n\t"
- "pmullh %[dest], %[src1_hi], %[alpha_rhi] \n\t"
- "paddush %[dest_hi], %[dest_hi], %[dest] \n\t"
- "paddush %[dest_hi], %[dest_hi], %[mask2] \n\t"
- "psrlh %[dest_hi], %[dest_hi], %[shift] \n\t"
-
- "packushb %[dest], %[dest_lo], %[dest_hi] \n\t"
- "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
- "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
-
- "daddiu %[src0_ptr], %[src0_ptr], 0x08 \n\t"
- "daddiu %[src1_ptr], %[src1_ptr], 0x08 \n\t"
- "daddiu %[alpha_ptr], %[alpha_ptr], 0x08 \n\t"
- "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t"
- "daddi %[width], %[width], -0x08 \n\t"
- "bnez %[width], 1b \n\t"
- : [src0] "=&f"(source0), [src1] "=&f"(source1), [alpha] "=&f"(alph),
- [dest] "=&f"(dest), [src0_hi] "=&f"(src0_hi), [src0_lo] "=&f"(src0_lo),
- [src1_hi] "=&f"(src1_hi), [src1_lo] "=&f"(src1_lo),
- [alpha_hi] "=&f"(alpha_hi), [alpha_lo] "=&f"(alpha_lo),
- [dest_hi] "=&f"(dest_hi), [dest_lo] "=&f"(dest_lo),
- [alpha_rlo] "=&f"(alpha_rev_lo), [alpha_rhi] "=&f"(alpha_rev_hi),
- [alpha_r] "=&f"(alpha_rev)
- : [src0_ptr] "r"(src0), [src1_ptr] "r"(src1), [alpha_ptr] "r"(alpha),
- [dst_ptr] "r"(dst), [mask0] "f"(mask0), [mask1] "f"(mask1),
- [mask2] "f"(mask2), [shift] "f"(shift), [width] "r"(width)
- : "memory");
-}
-
-// Multiply source RGB by alpha and store to destination.
-// This code mimics the SSSE3 version for better testability.
-void ARGBAttenuateRow_MMI(const uint8_t* src_argb,
- uint8_t* dst_argb,
- int width) {
- uint64_t src, src_hi, src_lo, dest, dest_hi, dest_lo, alpha;
- const uint64_t mask0 = 0xFF;
- const uint64_t mask1 = 0xFF000000FF000000ULL;
- const uint64_t mask2 = ~mask1;
- const uint64_t shift = 0x08;
-
- __asm__ volatile(
- "1: \n\t"
- "gsldlc1 %[src], 0x07(%[src_ptr]) \n\t"
- "gsldrc1 %[src], 0x00(%[src_ptr]) \n\t"
- "punpcklbh %[src_lo], %[src], %[src] \n\t"
- "punpckhbh %[src_hi], %[src], %[src] \n\t"
-
- "pshufh %[alpha], %[src_lo], %[mask0] \n\t"
- "pmulhuh %[dest_lo], %[alpha], %[src_lo] \n\t"
- "psrlh %[dest_lo], %[dest_lo], %[shift] \n\t"
- "pshufh %[alpha], %[src_hi], %[mask0] \n\t"
- "pmulhuh %[dest_hi], %[alpha], %[src_hi] \n\t"
- "psrlh %[dest_hi], %[dest_hi], %[shift] \n\t"
-
- "packushb %[dest], %[dest_lo], %[dest_hi] \n\t"
- "and %[dest], %[dest], %[mask2] \n\t"
- "and %[src], %[src], %[mask1] \n\t"
- "or %[dest], %[dest], %[src] \n\t"
- "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
- "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
-
- "daddiu %[src_ptr], %[src_ptr], 0x08 \n\t"
- "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t"
- "daddi %[width], %[width], -0x02 \n\t"
- "bnez %[width], 1b \n\t"
- : [src] "=&f"(src), [dest] "=&f"(dest), [src_hi] "=&f"(src_hi),
- [src_lo] "=&f"(src_lo), [dest_hi] "=&f"(dest_hi),
- [dest_lo] "=&f"(dest_lo), [alpha] "=&f"(alpha)
- : [src_ptr] "r"(src_argb), [dst_ptr] "r"(dst_argb), [mask0] "f"(mask0),
- [mask1] "f"(mask1), [mask2] "f"(mask2), [shift] "f"(shift),
- [width] "r"(width)
- : "memory");
-}
-
-void ComputeCumulativeSumRow_MMI(const uint8_t* row,
- int32_t* cumsum,
- const int32_t* previous_cumsum,
- int width) {
- int64_t row_sum[2] = {0, 0};
- uint64_t src, dest0, dest1, presrc0, presrc1, dest;
- const uint64_t mask = 0x0;
-
- __asm__ volatile(
- "xor %[row_sum0], %[row_sum0], %[row_sum0] \n\t"
- "xor %[row_sum1], %[row_sum1], %[row_sum1] \n\t"
-
- "1: \n\t"
- "gslwlc1 %[src], 0x03(%[row_ptr]) \n\t"
- "gslwrc1 %[src], 0x00(%[row_ptr]) \n\t"
-
- "punpcklbh %[src], %[src], %[mask] \n\t"
- "punpcklhw %[dest0], %[src], %[mask] \n\t"
- "punpckhhw %[dest1], %[src], %[mask] \n\t"
-
- "paddw %[row_sum0], %[row_sum0], %[dest0] \n\t"
- "paddw %[row_sum1], %[row_sum1], %[dest1] \n\t"
-
- "gsldlc1 %[presrc0], 0x07(%[pre_ptr]) \n\t"
- "gsldrc1 %[presrc0], 0x00(%[pre_ptr]) \n\t"
- "gsldlc1 %[presrc1], 0x0f(%[pre_ptr]) \n\t"
- "gsldrc1 %[presrc1], 0x08(%[pre_ptr]) \n\t"
-
- "paddw %[dest0], %[row_sum0], %[presrc0] \n\t"
- "paddw %[dest1], %[row_sum1], %[presrc1] \n\t"
-
- "gssdlc1 %[dest0], 0x07(%[dst_ptr]) \n\t"
- "gssdrc1 %[dest0], 0x00(%[dst_ptr]) \n\t"
- "gssdlc1 %[dest1], 0x0f(%[dst_ptr]) \n\t"
- "gssdrc1 %[dest1], 0x08(%[dst_ptr]) \n\t"
-
- "daddiu %[row_ptr], %[row_ptr], 0x04 \n\t"
- "daddiu %[pre_ptr], %[pre_ptr], 0x10 \n\t"
- "daddiu %[dst_ptr], %[dst_ptr], 0x10 \n\t"
- "daddi %[width], %[width], -0x01 \n\t"
- "bnez %[width], 1b \n\t"
- : [src] "=&f"(src), [dest] "=&f"(dest), [dest0] "=&f"(dest0),
- [dest1] "=&f"(dest1), [row_sum0] "+&f"(row_sum[0]),
- [row_sum1] "+&f"(row_sum[1]), [presrc0] "=&f"(presrc0),
- [presrc1] "=&f"(presrc1)
- : [row_ptr] "r"(row), [pre_ptr] "r"(previous_cumsum),
- [dst_ptr] "r"(cumsum), [width] "r"(width), [mask] "f"(mask)
- : "memory");
-}
-
-// C version 2x2 -> 2x1.
-void InterpolateRow_MMI(uint8_t* dst_ptr,
- const uint8_t* src_ptr,
- ptrdiff_t src_stride,
- int width,
- int source_y_fraction) {
- if (source_y_fraction == 0) {
- __asm__ volatile(
- "1: \n\t"
- "ld $t0, 0x0(%[src_ptr]) \n\t"
- "sd $t0, 0x0(%[dst_ptr]) \n\t"
- "daddiu %[src_ptr], %[src_ptr], 8 \n\t"
- "daddiu %[dst_ptr], %[dst_ptr], 8 \n\t"
- "daddiu %[width], %[width], -8 \n\t"
- "bgtz %[width], 1b \n\t"
- "nop \n\t"
- :
- : [dst_ptr] "r"(dst_ptr), [src_ptr] "r"(src_ptr), [width] "r"(width)
- : "memory");
- return;
- }
- if (source_y_fraction == 128) {
- uint64_t uv = 0x0;
- uint64_t uv_stride = 0x0;
- __asm__ volatile(
- "1: \n\t"
- "gsldrc1 %[uv], 0x0(%[src_ptr]) \n\t"
- "gsldlc1 %[uv], 0x7(%[src_ptr]) \n\t"
- "daddu $t0, %[src_ptr], %[stride] \n\t"
- "gsldrc1 %[uv_stride], 0x0($t0) \n\t"
- "gsldlc1 %[uv_stride], 0x7($t0) \n\t"
-
- "pavgb %[uv], %[uv], %[uv_stride] \n\t"
- "gssdrc1 %[uv], 0x0(%[dst_ptr]) \n\t"
- "gssdlc1 %[uv], 0x7(%[dst_ptr]) \n\t"
-
- "daddiu %[src_ptr], %[src_ptr], 8 \n\t"
- "daddiu %[dst_ptr], %[dst_ptr], 8 \n\t"
- "daddiu %[width], %[width], -8 \n\t"
- "bgtz %[width], 1b \n\t"
- "nop \n\t"
- : [uv] "=&f"(uv), [uv_stride] "=&f"(uv_stride)
- : [src_ptr] "r"(src_ptr), [dst_ptr] "r"(dst_ptr), [width] "r"(width),
- [stride] "r"((int64_t)src_stride)
- : "memory");
- return;
- }
- const uint8_t* src_ptr1 = src_ptr + src_stride;
- uint64_t temp;
- uint64_t data[4];
- uint64_t zero = 0x0;
- uint64_t c0 = 0x0080008000800080;
- uint64_t fy0 = 0x0100010001000100;
- uint64_t shift = 0x8;
- __asm__ volatile(
- "pshufh %[fy1], %[fy1], %[zero] \n\t"
- "psubh %[fy0], %[fy0], %[fy1] \n\t"
- "1: \n\t"
- "gsldrc1 %[t0], 0x0(%[src_ptr]) \n\t"
- "gsldlc1 %[t0], 0x7(%[src_ptr]) \n\t"
- "punpcklbh %[d0], %[t0], %[zero] \n\t"
- "punpckhbh %[d1], %[t0], %[zero] \n\t"
- "gsldrc1 %[t0], 0x0(%[src_ptr1]) \n\t"
- "gsldlc1 %[t0], 0x7(%[src_ptr1]) \n\t"
- "punpcklbh %[d2], %[t0], %[zero] \n\t"
- "punpckhbh %[d3], %[t0], %[zero] \n\t"
-
- "pmullh %[d0], %[d0], %[fy0] \n\t"
- "pmullh %[d2], %[d2], %[fy1] \n\t"
- "paddh %[d0], %[d0], %[d2] \n\t"
- "paddh %[d0], %[d0], %[c0] \n\t"
- "psrlh %[d0], %[d0], %[shift] \n\t"
-
- "pmullh %[d1], %[d1], %[fy0] \n\t"
- "pmullh %[d3], %[d3], %[fy1] \n\t"
- "paddh %[d1], %[d1], %[d3] \n\t"
- "paddh %[d1], %[d1], %[c0] \n\t"
- "psrlh %[d1], %[d1], %[shift] \n\t"
-
- "packushb %[d0], %[d0], %[d1] \n\t"
- "gssdrc1 %[d0], 0x0(%[dst_ptr]) \n\t"
- "gssdlc1 %[d0], 0x7(%[dst_ptr]) \n\t"
- "daddiu %[src_ptr], %[src_ptr], 8 \n\t"
- "daddiu %[src_ptr1], %[src_ptr1], 8 \n\t"
- "daddiu %[dst_ptr], %[dst_ptr], 8 \n\t"
- "daddiu %[width], %[width], -8 \n\t"
- "bgtz %[width], 1b \n\t"
- "nop \n\t"
- : [t0] "=&f"(temp), [d0] "=&f"(data[0]), [d1] "=&f"(data[1]),
- [d2] "=&f"(data[2]), [d3] "=&f"(data[3])
- : [src_ptr] "r"(src_ptr), [src_ptr1] "r"(src_ptr1),
- [dst_ptr] "r"(dst_ptr), [width] "r"(width),
- [fy1] "f"(source_y_fraction), [fy0] "f"(fy0), [c0] "f"(c0),
- [shift] "f"(shift), [zero] "f"(zero)
- : "memory");
-}
-
-// Use first 4 shuffler values to reorder ARGB channels.
-void ARGBShuffleRow_MMI(const uint8_t* src_argb,
- uint8_t* dst_argb,
- const uint8_t* shuffler,
- int width) {
- uint64_t source, dest0, dest1, dest;
- const uint64_t mask0 = 0x0;
- const uint64_t mask1 = (shuffler[0] & 0x03) | ((shuffler[1] & 0x03) << 2) |
- ((shuffler[2] & 0x03) << 4) |
- ((shuffler[3] & 0x03) << 6);
-
- __asm__ volatile(
- "1: \n\t"
- "gsldlc1 %[src], 0x07(%[src_ptr]) \n\t"
- "gsldrc1 %[src], 0x00(%[src_ptr]) \n\t"
-
- "punpcklbh %[dest0], %[src], %[mask0] \n\t"
- "pshufh %[dest0], %[dest0], %[mask1] \n\t"
- "punpckhbh %[dest1], %[src], %[mask0] \n\t"
- "pshufh %[dest1], %[dest1], %[mask1] \n\t"
- "packushb %[dest], %[dest0], %[dest1] \n\t"
-
- "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
- "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
-
- "daddiu %[src_ptr], %[src_ptr], 0x08 \n\t"
- "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t"
- "daddi %[width], %[width], -0x02 \n\t"
- "bnez %[width], 1b \n\t"
- : [src] "=&f"(source), [dest] "=&f"(dest), [dest0] "=&f"(dest0),
- [dest1] "=&f"(dest1)
- : [src_ptr] "r"(src_argb), [dst_ptr] "r"(dst_argb), [mask0] "f"(mask0),
- [mask1] "f"(mask1), [width] "r"(width)
- : "memory");
-}
-
-void I422ToYUY2Row_MMI(const uint8_t* src_y,
- const uint8_t* src_u,
- const uint8_t* src_v,
- uint8_t* dst_frame,
- int width) {
- uint64_t temp[3];
- uint64_t vu = 0x0;
- __asm__ volatile(
- "1: \n\t"
- "gsldlc1 %[ty], 0x7(%[src_y]) \n\t" // r=src_sobelx[i]
- "gsldrc1 %[ty], 0x0(%[src_y]) \n\t" // r=src_sobelx[i]
- "gslwlc1 %[tu], 0x3(%[src_u]) \n\t" // b=src_sobely[i]
- "gslwrc1 %[tu], 0x0(%[src_u]) \n\t" // b=src_sobely[i]
- "gslwlc1 %[tv], 0x3(%[src_v]) \n\t" // b=src_sobely[i]
- "gslwrc1 %[tv], 0x0(%[src_v]) \n\t" // b=src_sobely[i]
- "punpcklbh %[vu], %[tu], %[tv] \n\t" // g
- "punpcklbh %[tu], %[ty], %[vu] \n\t" // g
- "gssdlc1 %[tu], 0x7(%[dst_frame]) \n\t"
- "gssdrc1 %[tu], 0x0(%[dst_frame]) \n\t"
- "punpckhbh %[tu], %[ty], %[vu] \n\t" // g
- "gssdlc1 %[tu], 0x0F(%[dst_frame]) \n\t"
- "gssdrc1 %[tu], 0x08(%[dst_frame]) \n\t"
- "daddiu %[src_y], %[src_y], 8 \n\t"
- "daddiu %[src_u], %[src_u], 4 \n\t"
- "daddiu %[src_v], %[src_v], 4 \n\t"
- "daddiu %[dst_frame], %[dst_frame], 16 \n\t"
- "daddiu %[width], %[width], -8 \n\t"
- "bgtz %[width], 1b \n\t"
- "nop \n\t"
- : [ty] "=&f"(temp[1]), [tu] "=&f"(temp[1]), [tv] "=&f"(temp[1]),
- [vu] "=&f"(vu)
- : [src_y] "r"(src_y), [src_u] "r"(src_u), [src_v] "r"(src_v),
- [dst_frame] "r"(dst_frame), [width] "r"(width)
- : "memory");
-}
-
-void I422ToUYVYRow_MMI(const uint8_t* src_y,
- const uint8_t* src_u,
- const uint8_t* src_v,
- uint8_t* dst_frame,
- int width) {
- uint64_t temp[3];
- uint64_t vu = 0x0;
- __asm__ volatile(
- "1: \n\t"
- "gsldlc1 %[ty], 0x7(%[src_y]) \n\t" // r=src_sobelx[i]
- "gsldrc1 %[ty], 0x0(%[src_y]) \n\t" // r=src_sobelx[i]
- "gslwlc1 %[tu], 0x3(%[src_u]) \n\t" // b=src_sobely[i]
- "gslwrc1 %[tu], 0x0(%[src_u]) \n\t" // b=src_sobely[i]
- "gslwlc1 %[tv], 0x3(%[src_v]) \n\t" // b=src_sobely[i]
- "gslwrc1 %[tv], 0x0(%[src_v]) \n\t" // b=src_sobely[i]
- "punpcklbh %[vu], %[tu], %[tv] \n\t" // g
- "punpcklbh %[tu], %[vu], %[ty] \n\t" // g
- "gssdlc1 %[tu], 0x7(%[dst_frame]) \n\t"
- "gssdrc1 %[tu], 0x0(%[dst_frame]) \n\t"
- "punpckhbh %[tu], %[vu], %[ty] \n\t" // g
- "gssdlc1 %[tu], 0x0F(%[dst_frame]) \n\t"
- "gssdrc1 %[tu], 0x08(%[dst_frame]) \n\t"
- "daddiu %[src_y], %[src_y], 8 \n\t"
- "daddiu %[src_u], %[src_u], 4 \n\t"
- "daddiu %[src_v], %[src_v], 4 \n\t"
- "daddiu %[dst_frame], %[dst_frame], 16 \n\t"
- "daddiu %[width], %[width], -8 \n\t"
- "bgtz %[width], 1b \n\t"
- "nop \n\t"
- : [ty] "=&f"(temp[1]), [tu] "=&f"(temp[1]), [tv] "=&f"(temp[1]),
- [vu] "=&f"(vu)
- : [src_y] "r"(src_y), [src_u] "r"(src_u), [src_v] "r"(src_v),
- [dst_frame] "r"(dst_frame), [width] "r"(width)
- : "memory");
-}
-
-void ARGBCopyAlphaRow_MMI(const uint8_t* src, uint8_t* dst, int width) {
- uint64_t source, dest;
- const uint64_t mask0 = 0xff000000ff000000ULL;
- const uint64_t mask1 = ~mask0;
-
- __asm__ volatile(
- "1: \n\t"
- "gsldlc1 %[src], 0x07(%[src_ptr]) \n\t"
- "gsldrc1 %[src], 0x00(%[src_ptr]) \n\t"
- "gsldlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
- "gsldrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
-
- "and %[src], %[src], %[mask0] \n\t"
- "and %[dest], %[dest], %[mask1] \n\t"
- "or %[dest], %[src], %[dest] \n\t"
- "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
- "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
-
- "daddiu %[src_ptr], %[src_ptr], 0x08 \n\t"
- "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t"
- "daddi %[width], %[width], -0x02 \n\t"
- "bnez %[width], 1b \n\t"
- : [src] "=&f"(source), [dest] "=&f"(dest)
- : [src_ptr] "r"(src), [dst_ptr] "r"(dst), [mask0] "f"(mask0),
- [mask1] "f"(mask1), [width] "r"(width)
- : "memory");
-}
-
-void ARGBExtractAlphaRow_MMI(const uint8_t* src_argb,
- uint8_t* dst_a,
- int width) {
- uint64_t src, dest0, dest1, dest_lo, dest_hi, dest;
- const uint64_t mask = 0xff000000ff000000ULL;
- const uint64_t shift = 0x18;
-
- __asm__ volatile(
- "1: \n\t"
- "gsldlc1 %[src], 0x07(%[src_ptr]) \n\t"
- "gsldrc1 %[src], 0x00(%[src_ptr]) \n\t"
- "and %[dest0], %[src], %[mask] \n\t"
- "psrlw %[dest0], %[dest0], %[shift] \n\t"
- "gsldlc1 %[src], 0x0f(%[src_ptr]) \n\t"
- "gsldrc1 %[src], 0x08(%[src_ptr]) \n\t"
- "and %[dest1], %[src], %[mask] \n\t"
- "psrlw %[dest1], %[dest1], %[shift] \n\t"
- "packsswh %[dest_lo], %[dest0], %[dest1] \n\t"
-
- "gsldlc1 %[src], 0x17(%[src_ptr]) \n\t"
- "gsldrc1 %[src], 0x10(%[src_ptr]) \n\t"
- "and %[dest0], %[src], %[mask] \n\t"
- "psrlw %[dest0], %[dest0], %[shift] \n\t"
- "gsldlc1 %[src], 0x1f(%[src_ptr]) \n\t"
- "gsldrc1 %[src], 0x18(%[src_ptr]) \n\t"
- "and %[dest1], %[src], %[mask] \n\t"
- "psrlw %[dest1], %[dest1], %[shift] \n\t"
- "packsswh %[dest_hi], %[dest0], %[dest1] \n\t"
-
- "packushb %[dest], %[dest_lo], %[dest_hi] \n\t"
-
- "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
- "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
-
- "daddiu %[src_ptr], %[src_ptr], 0x20 \n\t"
- "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t"
- "daddi %[width], %[width], -0x08 \n\t"
- "bnez %[width], 1b \n\t"
- : [src] "=&f"(src), [dest] "=&f"(dest), [dest0] "=&f"(dest0),
- [dest1] "=&f"(dest1), [dest_lo] "=&f"(dest_lo), [dest_hi] "=&f"(dest_hi)
- : [src_ptr] "r"(src_argb), [dst_ptr] "r"(dst_a), [mask] "f"(mask),
- [shift] "f"(shift), [width] "r"(width)
- : "memory");
-}
-
-void ARGBCopyYToAlphaRow_MMI(const uint8_t* src, uint8_t* dst, int width) {
- uint64_t source, dest0, dest1, dest;
- const uint64_t mask0 = 0x0;
- const uint64_t mask1 = 0x00ffffff00ffffffULL;
-
- __asm__ volatile(
- "1: \n\t"
- "gsldlc1 %[src], 0x07(%[src_ptr]) \n\t"
- "gsldrc1 %[src], 0x00(%[src_ptr]) \n\t"
-
- "punpcklbh %[dest0], %[mask0], %[src] \n\t"
- "punpcklhw %[dest1], %[mask0], %[dest0] \n\t"
- "gsldlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
- "gsldrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
- "and %[dest], %[dest], %[mask1] \n\t"
- "or %[dest], %[dest], %[dest1] \n\t"
- "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
- "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
- "punpckhhw %[dest1], %[mask0], %[dest0] \n\t"
- "gsldlc1 %[dest], 0x0f(%[dst_ptr]) \n\t"
- "gsldrc1 %[dest], 0x08(%[dst_ptr]) \n\t"
- "and %[dest], %[dest], %[mask1] \n\t"
- "or %[dest], %[dest], %[dest1] \n\t"
- "gssdlc1 %[dest], 0x0f(%[dst_ptr]) \n\t"
- "gssdrc1 %[dest], 0x08(%[dst_ptr]) \n\t"
-
- "punpckhbh %[dest0], %[mask0], %[src] \n\t"
- "punpcklhw %[dest1], %[mask0], %[dest0] \n\t"
- "gsldlc1 %[dest], 0x17(%[dst_ptr]) \n\t"
- "gsldrc1 %[dest], 0x10(%[dst_ptr]) \n\t"
- "and %[dest], %[dest], %[mask1] \n\t"
- "or %[dest], %[dest], %[dest1] \n\t"
- "gssdlc1 %[dest], 0x17(%[dst_ptr]) \n\t"
- "gssdrc1 %[dest], 0x10(%[dst_ptr]) \n\t"
- "punpckhhw %[dest1], %[mask0], %[dest0] \n\t"
- "gsldlc1 %[dest], 0x1f(%[dst_ptr]) \n\t"
- "gsldrc1 %[dest], 0x18(%[dst_ptr]) \n\t"
- "and %[dest], %[dest], %[mask1] \n\t"
- "or %[dest], %[dest], %[dest1] \n\t"
- "gssdlc1 %[dest], 0x1f(%[dst_ptr]) \n\t"
- "gssdrc1 %[dest], 0x18(%[dst_ptr]) \n\t"
-
- "daddiu %[src_ptr], %[src_ptr], 0x08 \n\t"
- "daddiu %[dst_ptr], %[dst_ptr], 0x20 \n\t"
- "daddi %[width], %[width], -0x08 \n\t"
- "bnez %[width], 1b \n\t"
- : [src] "=&f"(source), [dest] "=&f"(dest), [dest0] "=&f"(dest0),
- [dest1] "=&f"(dest1)
- : [src_ptr] "r"(src), [dst_ptr] "r"(dst), [mask0] "f"(mask0),
- [mask1] "f"(mask1), [width] "r"(width)
- : "memory");
-}
-
-void I444ToARGBRow_MMI(const uint8_t* src_y,
- const uint8_t* src_u,
- const uint8_t* src_v,
- uint8_t* rgb_buf,
- const struct YuvConstants* yuvconstants,
- int width) {
- uint64_t y,u,v;
- uint64_t b_vec[2],g_vec[2],r_vec[2];
- uint64_t mask = 0xff00ff00ff00ff00ULL;
- uint64_t ub,ug,vg,vr,bb,bg,br,yg;
- __asm__ volatile (
- "ldc1 %[yg], 0xc0(%[yuvcons_ptr]) \n\t"//yg
- "ldc1 %[bb], 0x60(%[yuvcons_ptr]) \n\t"//bb
- "ldc1 %[ub], 0x00(%[yuvcons_ptr]) \n\t"//ub
- "or %[ub], %[ub], %[mask] \n\t"//must sign extension
- "ldc1 %[bg], 0x80(%[yuvcons_ptr]) \n\t"//bg
- "ldc1 %[ug], 0x20(%[yuvcons_ptr]) \n\t"//ug
- "punpcklbh %[ug], %[ug], %[zero] \n\t"
- "pshufh %[ug], %[ug], %[zero] \n\t"
- "ldc1 %[vg], 0x20(%[yuvcons_ptr]) \n\t"//vg
- "punpcklbh %[vg], %[vg], %[zero] \n\t"
- "pshufh %[vg], %[vg], %[five] \n\t"
- "ldc1 %[br], 0xa0(%[yuvcons_ptr]) \n\t"//br
- "ldc1 %[vr], 0x40(%[yuvcons_ptr]) \n\t"//vr
- "punpcklbh %[vr], %[vr], %[zero] \n\t"
- "pshufh %[vr], %[vr], %[five] \n\t"
- "or %[vr], %[vr], %[mask] \n\t"//sign extension
-
- "1: \n\t"
- "gslwlc1 %[y], 0x03(%[y_ptr]) \n\t"
- "gslwrc1 %[y], 0x00(%[y_ptr]) \n\t"
- "gslwlc1 %[u], 0x03(%[u_ptr]) \n\t"
- "gslwrc1 %[u], 0x00(%[u_ptr]) \n\t"
- "gslwlc1 %[v], 0x03(%[v_ptr]) \n\t"
- "gslwrc1 %[v], 0x00(%[v_ptr]) \n\t"
-
- "punpcklbh %[y], %[y], %[y] \n\t"//y*0x0101
- "pmulhuh %[y], %[y], %[yg] \n\t"//y1
-
- "punpcklbh %[u], %[u], %[zero] \n\t"//u
- "paddsh %[b_vec0], %[y], %[bb] \n\t"
- "pmullh %[b_vec1], %[u], %[ub] \n\t"
- "psubsh %[b_vec0], %[b_vec0], %[b_vec1] \n\t"
- "psrah %[b_vec0], %[b_vec0], %[six] \n\t"
-
- "punpcklbh %[v], %[v], %[zero] \n\t"//v
- "paddsh %[g_vec0], %[y], %[bg] \n\t"
- "pmullh %[g_vec1], %[u], %[ug] \n\t"//u*ug
- "psubsh %[g_vec0], %[g_vec0], %[g_vec1] \n\t"
- "pmullh %[g_vec1], %[v], %[vg] \n\t"//v*vg
- "psubsh %[g_vec0], %[g_vec0], %[g_vec1] \n\t"
- "psrah %[g_vec0], %[g_vec0], %[six] \n\t"
-
- "paddsh %[r_vec0], %[y], %[br] \n\t"
- "pmullh %[r_vec1], %[v], %[vr] \n\t"//v*vr
- "psubsh %[r_vec0], %[r_vec0], %[r_vec1] \n\t"
- "psrah %[r_vec0], %[r_vec0], %[six] \n\t"
-
- "packushb %[r_vec0], %[b_vec0], %[r_vec0] \n\t"//rrrrbbbb
- "packushb %[g_vec0], %[g_vec0], %[alpha] \n\t"//ffffgggg
- "punpcklwd %[g_vec0], %[g_vec0], %[alpha] \n\t"
- "punpcklbh %[b_vec0], %[r_vec0], %[g_vec0] \n\t"//gbgbgbgb
- "punpckhbh %[r_vec0], %[r_vec0], %[g_vec0] \n\t"//frfrfrfr
- "punpcklhw %[g_vec0], %[b_vec0], %[r_vec0] \n\t"//frgbfrgb
- "punpckhhw %[g_vec1], %[b_vec0], %[r_vec0] \n\t"//frgbfrgb
- "gssdlc1 %[g_vec0], 0x07(%[rgbbuf_ptr]) \n\t"
- "gssdrc1 %[g_vec0], 0x00(%[rgbbuf_ptr]) \n\t"
- "gssdlc1 %[g_vec1], 0x0f(%[rgbbuf_ptr]) \n\t"
- "gssdrc1 %[g_vec1], 0x08(%[rgbbuf_ptr]) \n\t"
-
- "daddiu %[y_ptr], %[y_ptr], 0x04 \n\t"
- "daddiu %[u_ptr], %[u_ptr], 0x04 \n\t"
- "daddiu %[v_ptr], %[v_ptr], 0x04 \n\t"
- "daddiu %[rgbbuf_ptr], %[rgbbuf_ptr], 0x10 \n\t"
- "daddi %[width], %[width], -0x04 \n\t"
- "bnez %[width], 1b \n\t"
- : [y]"=&f"(y),
- [u]"=&f"(u), [v]"=&f"(v),
- [b_vec0]"=&f"(b_vec[0]), [b_vec1]"=&f"(b_vec[1]),
- [g_vec0]"=&f"(g_vec[0]), [g_vec1]"=&f"(g_vec[1]),
- [r_vec0]"=&f"(r_vec[0]), [r_vec1]"=&f"(r_vec[1]),
- [ub]"=&f"(ub), [ug]"=&f"(ug),
- [vg]"=&f"(vg), [vr]"=&f"(vr),
- [bb]"=&f"(bb), [bg]"=&f"(bg),
- [br]"=&f"(br), [yg]"=&f"(yg)
- : [y_ptr]"r"(src_y), [u_ptr]"r"(src_u),
- [v_ptr]"r"(src_v), [rgbbuf_ptr]"r"(rgb_buf),
- [yuvcons_ptr]"r"(yuvconstants), [width]"r"(width),
- [zero]"f"(0x00), [alpha]"f"(-1),
- [six]"f"(0x6), [five]"f"(0x55),
- [mask]"f"(mask)
- : "memory"
- );
-}
-
-// Also used for 420
-void I422ToARGBRow_MMI(const uint8_t* src_y,
- const uint8_t* src_u,
- const uint8_t* src_v,
- uint8_t* rgb_buf,
- const struct YuvConstants* yuvconstants,
- int width) {
- uint64_t y,u,v;
- uint64_t b_vec[2],g_vec[2],r_vec[2];
- uint64_t mask = 0xff00ff00ff00ff00ULL;
- uint64_t ub,ug,vg,vr,bb,bg,br,yg;
-
- __asm__ volatile(
- "ldc1 %[yg], 0xc0(%[yuvcons_ptr]) \n\t"//yg
- "ldc1 %[bb], 0x60(%[yuvcons_ptr]) \n\t"//bb
- "ldc1 %[ub], 0x00(%[yuvcons_ptr]) \n\t"//ub
- "or %[ub], %[ub], %[mask] \n\t"//must sign extension
- "ldc1 %[bg], 0x80(%[yuvcons_ptr]) \n\t"//bg
- "ldc1 %[ug], 0x20(%[yuvcons_ptr]) \n\t"//ug
- "punpcklbh %[ug], %[ug], %[zero] \n\t"
- "pshufh %[ug], %[ug], %[zero] \n\t"
- "ldc1 %[vg], 0x20(%[yuvcons_ptr]) \n\t"//vg
- "punpcklbh %[vg], %[vg], %[zero] \n\t"
- "pshufh %[vg], %[vg], %[five] \n\t"
- "ldc1 %[br], 0xa0(%[yuvcons_ptr]) \n\t"//br
- "ldc1 %[vr], 0x40(%[yuvcons_ptr]) \n\t"//vr
- "punpcklbh %[vr], %[vr], %[zero] \n\t"
- "pshufh %[vr], %[vr], %[five] \n\t"
- "or %[vr], %[vr], %[mask] \n\t"//sign extension
-
- "1: \n\t"
- "gslwlc1 %[y], 0x03(%[y_ptr]) \n\t"
- "gslwrc1 %[y], 0x00(%[y_ptr]) \n\t"
- "gslwlc1 %[u], 0x03(%[u_ptr]) \n\t"
- "gslwrc1 %[u], 0x00(%[u_ptr]) \n\t"
- "gslwlc1 %[v], 0x03(%[v_ptr]) \n\t"
- "gslwrc1 %[v], 0x00(%[v_ptr]) \n\t"
-
- "punpcklbh %[y], %[y], %[y] \n\t"//y*0x0101
- "pmulhuh %[y], %[y], %[yg] \n\t"//y1
-
- //u3|u2|u1|u0 --> u1|u1|u0|u0
- "punpcklbh %[u], %[u], %[u] \n\t"//u
- "punpcklbh %[u], %[u], %[zero] \n\t"
- "paddsh %[b_vec0], %[y], %[bb] \n\t"
- "pmullh %[b_vec1], %[u], %[ub] \n\t"
- "psubsh %[b_vec0], %[b_vec0], %[b_vec1] \n\t"
- "psrah %[b_vec0], %[b_vec0], %[six] \n\t"
-
- //v3|v2|v1|v0 --> v1|v1|v0|v0
- "punpcklbh %[v], %[v], %[v] \n\t"//v
- "punpcklbh %[v], %[v], %[zero] \n\t"
- "paddsh %[g_vec0], %[y], %[bg] \n\t"
- "pmullh %[g_vec1], %[u], %[ug] \n\t"//u*ug
- "psubsh %[g_vec0], %[g_vec0], %[g_vec1] \n\t"
- "pmullh %[g_vec1], %[v], %[vg] \n\t"//v*vg
- "psubsh %[g_vec0], %[g_vec0], %[g_vec1] \n\t"
- "psrah %[g_vec0], %[g_vec0], %[six] \n\t"
-
- "paddsh %[r_vec0], %[y], %[br] \n\t"
- "pmullh %[r_vec1], %[v], %[vr] \n\t"//v*vr
- "psubsh %[r_vec0], %[r_vec0], %[r_vec1] \n\t"
- "psrah %[r_vec0], %[r_vec0], %[six] \n\t"
-
- "packushb %[r_vec0], %[b_vec0], %[r_vec0] \n\t"//rrrrbbbb
- "packushb %[g_vec0], %[g_vec0], %[alpha] \n\t"//ffffgggg
- "punpcklwd %[g_vec0], %[g_vec0], %[alpha] \n\t"
- "punpcklbh %[b_vec0], %[r_vec0], %[g_vec0] \n\t"//gbgbgbgb
- "punpckhbh %[r_vec0], %[r_vec0], %[g_vec0] \n\t"//frfrfrfr
- "punpcklhw %[g_vec0], %[b_vec0], %[r_vec0] \n\t"//frgbfrgb
- "punpckhhw %[g_vec1], %[b_vec0], %[r_vec0] \n\t"//frgbfrgb
- "gssdlc1 %[g_vec0], 0x07(%[rgbbuf_ptr]) \n\t"
- "gssdrc1 %[g_vec0], 0x00(%[rgbbuf_ptr]) \n\t"
- "gssdlc1 %[g_vec1], 0x0f(%[rgbbuf_ptr]) \n\t"
- "gssdrc1 %[g_vec1], 0x08(%[rgbbuf_ptr]) \n\t"
-
- "daddiu %[y_ptr], %[y_ptr], 0x04 \n\t"
- "daddiu %[u_ptr], %[u_ptr], 0x02 \n\t"
- "daddiu %[v_ptr], %[v_ptr], 0x02 \n\t"
- "daddiu %[rgbbuf_ptr], %[rgbbuf_ptr], 0x10 \n\t"
- "daddi %[width], %[width], -0x04 \n\t"
- "bnez %[width], 1b \n\t"
-
- : [y]"=&f"(y),
- [u]"=&f"(u), [v]"=&f"(v),
- [b_vec0]"=&f"(b_vec[0]), [b_vec1]"=&f"(b_vec[1]),
- [g_vec0]"=&f"(g_vec[0]), [g_vec1]"=&f"(g_vec[1]),
- [r_vec0]"=&f"(r_vec[0]), [r_vec1]"=&f"(r_vec[1]),
- [ub]"=&f"(ub), [ug]"=&f"(ug),
- [vg]"=&f"(vg), [vr]"=&f"(vr),
- [bb]"=&f"(bb), [bg]"=&f"(bg),
- [br]"=&f"(br), [yg]"=&f"(yg)
- : [y_ptr]"r"(src_y), [u_ptr]"r"(src_u),
- [v_ptr]"r"(src_v), [rgbbuf_ptr]"r"(rgb_buf),
- [yuvcons_ptr]"r"(yuvconstants), [width]"r"(width),
- [zero]"f"(0x00), [alpha]"f"(-1),
- [six]"f"(0x6), [five]"f"(0x55),
- [mask]"f"(mask)
- : "memory"
- );
-}
-
-// 10 bit YUV to ARGB
-void I210ToARGBRow_MMI(const uint16_t* src_y,
- const uint16_t* src_u,
- const uint16_t* src_v,
- uint8_t* rgb_buf,
- const struct YuvConstants* yuvconstants,
- int width) {
- uint64_t y,u,v;
- uint64_t b_vec[2],g_vec[2],r_vec[2];
- uint64_t mask = 0xff00ff00ff00ff00ULL;
- uint64_t ub,ug,vg,vr,bb,bg,br,yg;
-
- __asm__ volatile(
- "ldc1 %[yg], 0xc0(%[yuvcons_ptr]) \n\t"
- "ldc1 %[bb], 0x60(%[yuvcons_ptr]) \n\t"
- "ldc1 %[ub], 0x00(%[yuvcons_ptr]) \n\t"
- "or %[ub], %[ub], %[mask] \n\t"
- "ldc1 %[bg], 0x80(%[yuvcons_ptr]) \n\t"
- "ldc1 %[ug], 0x20(%[yuvcons_ptr]) \n\t"
- "punpcklbh %[ug], %[ug], %[zero] \n\t"
- "pshufh %[ug], %[ug], %[zero] \n\t"
- "ldc1 %[vg], 0x20(%[yuvcons_ptr]) \n\t"
- "punpcklbh %[vg], %[vg], %[zero] \n\t"
- "pshufh %[vg], %[vg], %[five] \n\t"
- "ldc1 %[br], 0xa0(%[yuvcons_ptr]) \n\t"
- "ldc1 %[vr], 0x40(%[yuvcons_ptr]) \n\t"
- "punpcklbh %[vr], %[vr], %[zero] \n\t"
- "pshufh %[vr], %[vr], %[five] \n\t"
- "or %[vr], %[vr], %[mask] \n\t"
-
- "1: \n\t"
- "gsldlc1 %[y], 0x07(%[y_ptr]) \n\t"
- "gsldrc1 %[y], 0x00(%[y_ptr]) \n\t"
- "gslwlc1 %[u], 0x03(%[u_ptr]) \n\t"
- "gslwrc1 %[u], 0x00(%[u_ptr]) \n\t"
- "gslwlc1 %[v], 0x03(%[v_ptr]) \n\t"
- "gslwrc1 %[v], 0x00(%[v_ptr]) \n\t"
-
- "psllh %[y], %[y], %[six] \n\t"
- "pmulhuh %[y], %[y], %[yg] \n\t"
-
- "punpcklhw %[u], %[u], %[u] \n\t"
- "psrah %[u], %[u], %[two] \n\t"
- "punpcklhw %[v], %[v], %[v] \n\t"
- "psrah %[v], %[v], %[two] \n\t"
- "pminsh %[u], %[u], %[mask1] \n\t"
- "pminsh %[v], %[v], %[mask1] \n\t"
-
- "paddsh %[b_vec0], %[y], %[bb] \n\t"
- "pmullh %[b_vec1], %[u], %[ub] \n\t"
- "psubsh %[b_vec0], %[b_vec0], %[b_vec1] \n\t"
-
- "paddsh %[g_vec0], %[y], %[bg] \n\t"
- "pmullh %[g_vec1], %[u], %[ug] \n\t"
- "psubsh %[g_vec0], %[g_vec0], %[g_vec1] \n\t"
- "pmullh %[g_vec1], %[v], %[vg] \n\t"
- "psubsh %[g_vec0], %[g_vec0], %[g_vec1] \n\t"
-
- "paddsh %[r_vec0], %[y], %[br] \n\t"
- "pmullh %[r_vec1], %[v], %[vr] \n\t"
- "psubsh %[r_vec0], %[r_vec0], %[r_vec1] \n\t"
-
- "psrah %[b_vec0], %[b_vec0], %[six] \n\t"
- "psrah %[g_vec0], %[g_vec0], %[six] \n\t"
- "psrah %[r_vec0], %[r_vec0], %[six] \n\t"
-
- "packushb %[r_vec0], %[b_vec0], %[r_vec0] \n\t"
- "packushb %[g_vec0], %[g_vec0], %[alpha] \n\t"
- "punpcklwd %[g_vec0], %[g_vec0], %[alpha] \n\t"
- "punpcklbh %[b_vec0], %[r_vec0], %[g_vec0] \n\t"
- "punpckhbh %[r_vec0], %[r_vec0], %[g_vec0] \n\t"
- "punpcklhw %[g_vec0], %[b_vec0], %[r_vec0] \n\t"
- "punpckhhw %[g_vec1], %[b_vec0], %[r_vec0] \n\t"
- "gssdlc1 %[g_vec0], 0x07(%[rgbbuf_ptr]) \n\t"
- "gssdrc1 %[g_vec0], 0x00(%[rgbbuf_ptr]) \n\t"
- "gssdlc1 %[g_vec1], 0x0f(%[rgbbuf_ptr]) \n\t"
- "gssdrc1 %[g_vec1], 0x08(%[rgbbuf_ptr]) \n\t"
-
- "daddiu %[y_ptr], %[y_ptr], 0x08 \n\t"
- "daddiu %[u_ptr], %[u_ptr], 0x04 \n\t"
- "daddiu %[v_ptr], %[v_ptr], 0x04 \n\t"
- "daddiu %[rgbbuf_ptr], %[rgbbuf_ptr], 0x10 \n\t"
- "daddi %[width], %[width], -0x04 \n\t"
- "bnez %[width], 1b \n\t"
-
- : [y]"=&f"(y),
- [u]"=&f"(u), [v]"=&f"(v),
- [b_vec0]"=&f"(b_vec[0]), [b_vec1]"=&f"(b_vec[1]),
- [g_vec0]"=&f"(g_vec[0]), [g_vec1]"=&f"(g_vec[1]),
- [r_vec0]"=&f"(r_vec[0]), [r_vec1]"=&f"(r_vec[1]),
- [ub]"=&f"(ub), [ug]"=&f"(ug),
- [vg]"=&f"(vg), [vr]"=&f"(vr),
- [bb]"=&f"(bb), [bg]"=&f"(bg),
- [br]"=&f"(br), [yg]"=&f"(yg)
- : [y_ptr]"r"(src_y), [u_ptr]"r"(src_u),
- [v_ptr]"r"(src_v), [rgbbuf_ptr]"r"(rgb_buf),
- [yuvcons_ptr]"r"(yuvconstants), [width]"r"(width),
- [zero]"f"(0x00), [alpha]"f"(-1),
- [six]"f"(0x6), [five]"f"(0x55),
- [mask]"f"(mask), [two]"f"(0x02),
- [mask1]"f"(0x00ff00ff00ff00ff)
- : "memory"
- );
-}
-
-void I422AlphaToARGBRow_MMI(const uint8_t* src_y,
- const uint8_t* src_u,
- const uint8_t* src_v,
- const uint8_t* src_a,
- uint8_t* rgb_buf,
- const struct YuvConstants* yuvconstants,
- int width) {
- uint64_t y,u,v,a;
- uint64_t b_vec[2],g_vec[2],r_vec[2];
- uint64_t mask = 0xff00ff00ff00ff00ULL;
- uint64_t ub,ug,vg,vr,bb,bg,br,yg;
-
- __asm__ volatile(
- "ldc1 %[yg], 0xc0(%[yuvcons_ptr]) \n\t"
- "ldc1 %[bb], 0x60(%[yuvcons_ptr]) \n\t"
- "ldc1 %[ub], 0x00(%[yuvcons_ptr]) \n\t"
- "or %[ub], %[ub], %[mask] \n\t"
- "ldc1 %[bg], 0x80(%[yuvcons_ptr]) \n\t"
- "ldc1 %[ug], 0x20(%[yuvcons_ptr]) \n\t"
- "punpcklbh %[ug], %[ug], %[zero] \n\t"
- "pshufh %[ug], %[ug], %[zero] \n\t"
- "ldc1 %[vg], 0x20(%[yuvcons_ptr]) \n\t"
- "punpcklbh %[vg], %[vg], %[zero] \n\t"
- "pshufh %[vg], %[vg], %[five] \n\t"
- "ldc1 %[br], 0xa0(%[yuvcons_ptr]) \n\t"
- "ldc1 %[vr], 0x40(%[yuvcons_ptr]) \n\t"
- "punpcklbh %[vr], %[vr], %[zero] \n\t"
- "pshufh %[vr], %[vr], %[five] \n\t"
- "or %[vr], %[vr], %[mask] \n\t"
-
- "1: \n\t"
- "gslwlc1 %[y], 0x03(%[y_ptr]) \n\t"
- "gslwrc1 %[y], 0x00(%[y_ptr]) \n\t"
- "gslwlc1 %[u], 0x03(%[u_ptr]) \n\t"
- "gslwrc1 %[u], 0x00(%[u_ptr]) \n\t"
- "gslwlc1 %[v], 0x03(%[v_ptr]) \n\t"
- "gslwrc1 %[v], 0x00(%[v_ptr]) \n\t"
- "gslwlc1 %[a], 0x03(%[a_ptr]) \n\t"
- "gslwrc1 %[a], 0x00(%[a_ptr]) \n\t"
-
- "punpcklbh %[y], %[y], %[y] \n\t"//y*0x0101
- "pmulhuh %[y], %[y], %[yg] \n\t"//y1
-
- //u3|u2|u1|u0 --> u1|u1|u0|u0
- "punpcklbh %[u], %[u], %[u] \n\t"//u
- "punpcklbh %[u], %[u], %[zero] \n\t"
- "paddsh %[b_vec0], %[y], %[bb] \n\t"
- "pmullh %[b_vec1], %[u], %[ub] \n\t"
- "psubsh %[b_vec0], %[b_vec0], %[b_vec1] \n\t"
- "psrah %[b_vec0], %[b_vec0], %[six] \n\t"
-
- //v3|v2|v1|v0 --> v1|v1|v0|v0
- "punpcklbh %[v], %[v], %[v] \n\t"
- "punpcklbh %[v], %[v], %[zero] \n\t"
- "paddsh %[g_vec0], %[y], %[bg] \n\t"
- "pmullh %[g_vec1], %[u], %[ug] \n\t"
- "psubsh %[g_vec0], %[g_vec0], %[g_vec1] \n\t"
- "pmullh %[g_vec1], %[v], %[vg] \n\t"
- "psubsh %[g_vec0], %[g_vec0], %[g_vec1] \n\t"
- "psrah %[g_vec0], %[g_vec0], %[six] \n\t"
-
- "paddsh %[r_vec0], %[y], %[br] \n\t"
- "pmullh %[r_vec1], %[v], %[vr] \n\t"
- "psubsh %[r_vec0], %[r_vec0], %[r_vec1] \n\t"
- "psrah %[r_vec0], %[r_vec0], %[six] \n\t"
-
- "packushb %[r_vec0], %[b_vec0], %[r_vec0] \n\t"//rrrrbbbb
- "packushb %[g_vec0], %[g_vec0], %[a] \n\t"
- "punpcklwd %[g_vec0], %[g_vec0], %[a] \n\t"//aaaagggg
- "punpcklbh %[b_vec0], %[r_vec0], %[g_vec0] \n\t"
- "punpckhbh %[r_vec0], %[r_vec0], %[g_vec0] \n\t"
- "punpcklhw %[g_vec0], %[b_vec0], %[r_vec0] \n\t"
- "punpckhhw %[g_vec1], %[b_vec0], %[r_vec0] \n\t"
- "gssdlc1 %[g_vec0], 0x07(%[rgbbuf_ptr]) \n\t"
- "gssdrc1 %[g_vec0], 0x00(%[rgbbuf_ptr]) \n\t"
- "gssdlc1 %[g_vec1], 0x0f(%[rgbbuf_ptr]) \n\t"
- "gssdrc1 %[g_vec1], 0x08(%[rgbbuf_ptr]) \n\t"
-
- "daddiu %[y_ptr], %[y_ptr], 0x04 \n\t"
- "daddiu %[a_ptr], %[a_ptr], 0x04 \n\t"
- "daddiu %[u_ptr], %[u_ptr], 0x02 \n\t"
- "daddiu %[v_ptr], %[v_ptr], 0x02 \n\t"
- "daddiu %[rgbbuf_ptr], %[rgbbuf_ptr], 0x10 \n\t"
- "daddi %[width], %[width], -0x04 \n\t"
- "bnez %[width], 1b \n\t"
-
- : [y]"=&f"(y), [u]"=&f"(u),
- [v]"=&f"(v), [a]"=&f"(a),
- [b_vec0]"=&f"(b_vec[0]), [b_vec1]"=&f"(b_vec[1]),
- [g_vec0]"=&f"(g_vec[0]), [g_vec1]"=&f"(g_vec[1]),
- [r_vec0]"=&f"(r_vec[0]), [r_vec1]"=&f"(r_vec[1]),
- [ub]"=&f"(ub), [ug]"=&f"(ug),
- [vg]"=&f"(vg), [vr]"=&f"(vr),
- [bb]"=&f"(bb), [bg]"=&f"(bg),
- [br]"=&f"(br), [yg]"=&f"(yg)
- : [y_ptr]"r"(src_y), [u_ptr]"r"(src_u),
- [v_ptr]"r"(src_v), [rgbbuf_ptr]"r"(rgb_buf),
- [yuvcons_ptr]"r"(yuvconstants), [width]"r"(width),
- [a_ptr]"r"(src_a), [zero]"f"(0x00),
- [six]"f"(0x6), [five]"f"(0x55),
- [mask]"f"(mask)
- : "memory"
- );
-}
-
-void I422ToRGB24Row_MMI(const uint8_t* src_y,
- const uint8_t* src_u,
- const uint8_t* src_v,
- uint8_t* rgb_buf,
- const struct YuvConstants* yuvconstants,
- int width) {
- uint64_t y,u,v;
- uint64_t b_vec[2],g_vec[2],r_vec[2];
- uint64_t mask = 0xff00ff00ff00ff00ULL;
- uint64_t ub,ug,vg,vr,bb,bg,br,yg;
-
- __asm__ volatile(
- "ldc1 %[yg], 0xc0(%[yuvcons_ptr]) \n\t"
- "ldc1 %[bb], 0x60(%[yuvcons_ptr]) \n\t"
- "ldc1 %[ub], 0x00(%[yuvcons_ptr]) \n\t"
- "or %[ub], %[ub], %[mask] \n\t"
- "ldc1 %[bg], 0x80(%[yuvcons_ptr]) \n\t"
- "ldc1 %[ug], 0x20(%[yuvcons_ptr]) \n\t"
- "punpcklbh %[ug], %[ug], %[zero] \n\t"
- "pshufh %[ug], %[ug], %[zero] \n\t"
- "ldc1 %[vg], 0x20(%[yuvcons_ptr]) \n\t"
- "punpcklbh %[vg], %[vg], %[zero] \n\t"
- "pshufh %[vg], %[vg], %[five] \n\t"
- "ldc1 %[br], 0xa0(%[yuvcons_ptr]) \n\t"
- "ldc1 %[vr], 0x40(%[yuvcons_ptr]) \n\t"
- "punpcklbh %[vr], %[vr], %[zero] \n\t"
- "pshufh %[vr], %[vr], %[five] \n\t"
- "or %[vr], %[vr], %[mask] \n\t"
-
- "1: \n\t"
- "gslwlc1 %[y], 0x03(%[y_ptr]) \n\t"
- "gslwrc1 %[y], 0x00(%[y_ptr]) \n\t"
- "gslwlc1 %[u], 0x03(%[u_ptr]) \n\t"
- "gslwrc1 %[u], 0x00(%[u_ptr]) \n\t"
- "gslwlc1 %[v], 0x03(%[v_ptr]) \n\t"
- "gslwrc1 %[v], 0x00(%[v_ptr]) \n\t"
-
- "punpcklbh %[y], %[y], %[y] \n\t"//y*0x0101
- "pmulhuh %[y], %[y], %[yg] \n\t"//y1
-
- //u3|u2|u1|u0 --> u1|u1|u0|u0
- "punpcklbh %[u], %[u], %[u] \n\t"//u
- "punpcklbh %[u], %[u], %[zero] \n\t"
- "paddsh %[b_vec0], %[y], %[bb] \n\t"
- "pmullh %[b_vec1], %[u], %[ub] \n\t"
- "psubsh %[b_vec0], %[b_vec0], %[b_vec1] \n\t"
- "psrah %[b_vec0], %[b_vec0], %[six] \n\t"
-
- //v3|v2|v1|v0 --> v1|v1|v0|v0
- "punpcklbh %[v], %[v], %[v] \n\t"
- "punpcklbh %[v], %[v], %[zero] \n\t"
- "paddsh %[g_vec0], %[y], %[bg] \n\t"
- "pmullh %[g_vec1], %[u], %[ug] \n\t"
- "psubsh %[g_vec0], %[g_vec0], %[g_vec1] \n\t"
- "pmullh %[g_vec1], %[v], %[vg] \n\t"
- "psubsh %[g_vec0], %[g_vec0], %[g_vec1] \n\t"
- "psrah %[g_vec0], %[g_vec0], %[six] \n\t"
-
- "paddsh %[r_vec0], %[y], %[br] \n\t"
- "pmullh %[r_vec1], %[v], %[vr] \n\t"
- "psubsh %[r_vec0], %[r_vec0], %[r_vec1] \n\t"
- "psrah %[r_vec0], %[r_vec0], %[six] \n\t"
-
- "packushb %[r_vec0], %[b_vec0], %[r_vec0] \n\t"
- "packushb %[g_vec0], %[g_vec0], %[zero] \n\t"
- "punpcklbh %[b_vec0], %[r_vec0], %[g_vec0] \n\t"
- "punpckhbh %[r_vec0], %[r_vec0], %[g_vec0] \n\t"
- "punpcklhw %[g_vec0], %[b_vec0], %[r_vec0] \n\t"
- "punpckhhw %[g_vec1], %[b_vec0], %[r_vec0] \n\t"
-
- "punpckhwd %[r_vec0], %[g_vec0], %[g_vec0] \n\t"
- "psllw %[r_vec1], %[r_vec0], %[lmove1] \n\t"
- "or %[g_vec0], %[g_vec0], %[r_vec1] \n\t"
- "psrlw %[r_vec1], %[r_vec0], %[rmove1] \n\t"
- "pextrh %[r_vec1], %[r_vec1], %[zero] \n\t"
- "pinsrh_2 %[g_vec0], %[g_vec0], %[r_vec1] \n\t"
- "pextrh %[r_vec1], %[g_vec1], %[zero] \n\t"
- "pinsrh_3 %[g_vec0], %[g_vec0], %[r_vec1] \n\t"
- "pextrh %[r_vec1], %[g_vec1], %[one] \n\t"
- "punpckhwd %[g_vec1], %[g_vec1], %[g_vec1] \n\t"
- "psllw %[g_vec1], %[g_vec1], %[rmove1] \n\t"
- "or %[g_vec1], %[g_vec1], %[r_vec1] \n\t"
- "gssdlc1 %[g_vec0], 0x07(%[rgbbuf_ptr]) \n\t"
- "gssdrc1 %[g_vec0], 0x00(%[rgbbuf_ptr]) \n\t"
- "gsswlc1 %[g_vec1], 0x0b(%[rgbbuf_ptr]) \n\t"
- "gsswrc1 %[g_vec1], 0x08(%[rgbbuf_ptr]) \n\t"
-
-
- "daddiu %[y_ptr], %[y_ptr], 0x04 \n\t"
- "daddiu %[u_ptr], %[u_ptr], 0x02 \n\t"
- "daddiu %[v_ptr], %[v_ptr], 0x02 \n\t"
- "daddiu %[rgbbuf_ptr], %[rgbbuf_ptr], 0x0c \n\t"
- "daddi %[width], %[width], -0x04 \n\t"
- "bnez %[width], 1b \n\t"
-
- : [y]"=&f"(y), [u]"=&f"(u),
- [v]"=&f"(v),
- [b_vec0]"=&f"(b_vec[0]), [b_vec1]"=&f"(b_vec[1]),
- [g_vec0]"=&f"(g_vec[0]), [g_vec1]"=&f"(g_vec[1]),
- [r_vec0]"=&f"(r_vec[0]), [r_vec1]"=&f"(r_vec[1]),
- [ub]"=&f"(ub), [ug]"=&f"(ug),
- [vg]"=&f"(vg), [vr]"=&f"(vr),
- [bb]"=&f"(bb), [bg]"=&f"(bg),
- [br]"=&f"(br), [yg]"=&f"(yg)
- : [y_ptr]"r"(src_y), [u_ptr]"r"(src_u),
- [v_ptr]"r"(src_v), [rgbbuf_ptr]"r"(rgb_buf),
- [yuvcons_ptr]"r"(yuvconstants), [width]"r"(width),
- [zero]"f"(0x00), [five]"f"(0x55),
- [six]"f"(0x6), [mask]"f"(mask),
- [lmove1]"f"(0x18), [rmove1]"f"(0x8),
- [one]"f"(0x1)
- : "memory"
- );
-}
-
-void I422ToARGB4444Row_MMI(const uint8_t* src_y,
- const uint8_t* src_u,
- const uint8_t* src_v,
- uint8_t* dst_argb4444,
- const struct YuvConstants* yuvconstants,
- int width) {
- uint64_t y, u, v;
- uint64_t b_vec, g_vec, r_vec, temp;
- uint64_t ub,ug,vg,vr,bb,bg,br,yg;
-
- __asm__ volatile(
- "ldc1 %[yg], 0xc0(%[yuvcons_ptr]) \n\t"
- "ldc1 %[bb], 0x60(%[yuvcons_ptr]) \n\t"
- "ldc1 %[ub], 0x00(%[yuvcons_ptr]) \n\t"
- "or %[ub], %[ub], %[mask] \n\t"
- "ldc1 %[bg], 0x80(%[yuvcons_ptr]) \n\t"
- "ldc1 %[ug], 0x20(%[yuvcons_ptr]) \n\t"
- "punpcklbh %[ug], %[ug], %[zero] \n\t"
- "pshufh %[ug], %[ug], %[zero] \n\t"
- "ldc1 %[vg], 0x20(%[yuvcons_ptr]) \n\t"
- "punpcklbh %[vg], %[vg], %[zero] \n\t"
- "pshufh %[vg], %[vg], %[five] \n\t"
- "ldc1 %[br], 0xa0(%[yuvcons_ptr]) \n\t"
- "ldc1 %[vr], 0x40(%[yuvcons_ptr]) \n\t"
- "punpcklbh %[vr], %[vr], %[zero] \n\t"
- "pshufh %[vr], %[vr], %[five] \n\t"
- "or %[vr], %[vr], %[mask] \n\t"
-
- "1: \n\t"
- "gslwlc1 %[y], 0x03(%[y_ptr]) \n\t"
- "gslwrc1 %[y], 0x00(%[y_ptr]) \n\t"
- "gslwlc1 %[u], 0x03(%[u_ptr]) \n\t"
- "gslwrc1 %[u], 0x00(%[u_ptr]) \n\t"
- "gslwlc1 %[v], 0x03(%[v_ptr]) \n\t"
- "gslwrc1 %[v], 0x00(%[v_ptr]) \n\t"
-
- "punpcklbh %[y], %[y], %[y] \n\t"//y*0x0101
- "pmulhuh %[y], %[y], %[yg] \n\t"//y1
-
- //u3|u2|u1|u0 --> u1|u1|u0|u0
- "punpcklbh %[u], %[u], %[u] \n\t"//u
- "punpcklbh %[u], %[u], %[zero] \n\t"
- "paddsh %[b_vec], %[y], %[bb] \n\t"
- "pmullh %[temp], %[u], %[ub] \n\t"
- "psubsh %[b_vec], %[b_vec], %[temp] \n\t"
- "psrah %[b_vec], %[b_vec], %[six] \n\t"
-
- //v3|v2|v1|v0 --> v1|v1|v0|v0
- "punpcklbh %[v], %[v], %[v] \n\t"
- "punpcklbh %[v], %[v], %[zero] \n\t"
- "paddsh %[g_vec], %[y], %[bg] \n\t"
- "pmullh %[temp], %[u], %[ug] \n\t"
- "psubsh %[g_vec], %[g_vec], %[temp] \n\t"
- "pmullh %[temp], %[v], %[vg] \n\t"
- "psubsh %[g_vec], %[g_vec], %[temp] \n\t"
- "psrah %[g_vec], %[g_vec], %[six] \n\t"
-
- "paddsh %[r_vec], %[y], %[br] \n\t"
- "pmullh %[temp], %[v], %[vr] \n\t"
- "psubsh %[r_vec], %[r_vec], %[temp] \n\t"
- "psrah %[r_vec], %[r_vec], %[six] \n\t"
-
- "packushb %[r_vec], %[b_vec], %[r_vec] \n\t"
- "packushb %[g_vec], %[g_vec], %[zero] \n\t"
- "punpcklwd %[g_vec], %[g_vec], %[alpha] \n\t"
- "punpcklbh %[b_vec], %[r_vec], %[g_vec] \n\t"
- "punpckhbh %[r_vec], %[r_vec], %[g_vec] \n\t"
- "punpcklhw %[g_vec], %[b_vec], %[r_vec] \n\t"
- "punpckhhw %[b_vec], %[b_vec], %[r_vec] \n\t"
-
- "and %[g_vec], %[g_vec], %[mask1] \n\t"
- "psrlw %[g_vec], %[g_vec], %[four] \n\t"
- "psrlw %[r_vec], %[g_vec], %[four] \n\t"
- "or %[g_vec], %[g_vec], %[r_vec] \n\t"
- "punpcklbh %[r_vec], %[alpha], %[zero] \n\t"
- "and %[g_vec], %[g_vec], %[r_vec] \n\t"
-
- "and %[b_vec], %[b_vec], %[mask1] \n\t"
- "psrlw %[b_vec], %[b_vec], %[four] \n\t"
- "psrlw %[r_vec], %[b_vec], %[four] \n\t"
- "or %[b_vec], %[b_vec], %[r_vec] \n\t"
- "punpcklbh %[r_vec], %[alpha], %[zero] \n\t"
- "and %[b_vec], %[b_vec], %[r_vec] \n\t"
- "packushb %[g_vec], %[g_vec], %[b_vec] \n\t"
-
- "gssdlc1 %[g_vec], 0x07(%[dst_argb4444]) \n\t"
- "gssdrc1 %[g_vec], 0x00(%[dst_argb4444]) \n\t"
-
- "daddiu %[y_ptr], %[y_ptr], 0x04 \n\t"
- "daddiu %[u_ptr], %[u_ptr], 0x02 \n\t"
- "daddiu %[v_ptr], %[v_ptr], 0x02 \n\t"
- "daddiu %[dst_argb4444], %[dst_argb4444], 0x08 \n\t"
- "daddi %[width], %[width], -0x04 \n\t"
- "bnez %[width], 1b \n\t"
-
- : [y]"=&f"(y), [u]"=&f"(u),
- [v]"=&f"(v),
- [b_vec]"=&f"(b_vec), [g_vec]"=&f"(g_vec),
- [r_vec]"=&f"(r_vec), [temp]"=&f"(temp),
- [ub]"=&f"(ub), [ug]"=&f"(ug),
- [vg]"=&f"(vg), [vr]"=&f"(vr),
- [bb]"=&f"(bb), [bg]"=&f"(bg),
- [br]"=&f"(br), [yg]"=&f"(yg)
- : [y_ptr]"r"(src_y), [u_ptr]"r"(src_u),
- [v_ptr]"r"(src_v), [dst_argb4444]"r"(dst_argb4444),
- [yuvcons_ptr]"r"(yuvconstants), [width]"r"(width),
- [zero]"f"(0x00), [five]"f"(0x55),
- [six]"f"(0x6), [mask]"f"(0xff00ff00ff00ff00),
- [four]"f"(0x4), [mask1]"f"(0xf0f0f0f0f0f0f0f0),
- [alpha]"f"(-1)
- : "memory"
- );
-}
-
-void I422ToARGB1555Row_MMI(const uint8_t* src_y,
- const uint8_t* src_u,
- const uint8_t* src_v,
- uint8_t* dst_argb1555,
- const struct YuvConstants* yuvconstants,
- int width) {
- uint64_t y, u, v;
- uint64_t b_vec, g_vec, r_vec, temp;
- uint64_t ub,ug,vg,vr,bb,bg,br,yg;
-
- __asm__ volatile(
- "ldc1 %[yg], 0xc0(%[yuvcons_ptr]) \n\t"
- "ldc1 %[bb], 0x60(%[yuvcons_ptr]) \n\t"
- "ldc1 %[ub], 0x00(%[yuvcons_ptr]) \n\t"
- "or %[ub], %[ub], %[mask1] \n\t"
- "ldc1 %[bg], 0x80(%[yuvcons_ptr]) \n\t"
- "ldc1 %[ug], 0x20(%[yuvcons_ptr]) \n\t"
- "punpcklbh %[ug], %[ug], %[zero] \n\t"
- "pshufh %[ug], %[ug], %[zero] \n\t"
- "ldc1 %[vg], 0x20(%[yuvcons_ptr]) \n\t"
- "punpcklbh %[vg], %[vg], %[zero] \n\t"
- "pshufh %[vg], %[vg], %[five] \n\t"
- "ldc1 %[br], 0xa0(%[yuvcons_ptr]) \n\t"
- "ldc1 %[vr], 0x40(%[yuvcons_ptr]) \n\t"
- "punpcklbh %[vr], %[vr], %[zero] \n\t"
- "pshufh %[vr], %[vr], %[five] \n\t"
- "or %[vr], %[vr], %[mask1] \n\t"
-
- "1: \n\t"
- "gslwlc1 %[y], 0x03(%[y_ptr]) \n\t"
- "gslwrc1 %[y], 0x00(%[y_ptr]) \n\t"
- "gslwlc1 %[u], 0x03(%[u_ptr]) \n\t"
- "gslwrc1 %[u], 0x00(%[u_ptr]) \n\t"
- "gslwlc1 %[v], 0x03(%[v_ptr]) \n\t"
- "gslwrc1 %[v], 0x00(%[v_ptr]) \n\t"
-
- "punpcklbh %[y], %[y], %[y] \n\t"
- "pmulhuh %[y], %[y], %[yg] \n\t"
-
- //u3|u2|u1|u0 --> u1|u1|u0|u0
- "punpcklbh %[u], %[u], %[u] \n\t"
- "punpcklbh %[u], %[u], %[zero] \n\t"
- "paddsh %[b_vec], %[y], %[bb] \n\t"
- "pmullh %[temp], %[u], %[ub] \n\t"
- "psubsh %[b_vec], %[b_vec], %[temp] \n\t"
- "psrah %[b_vec], %[b_vec], %[six] \n\t"
-
- //v3|v2|v1|v0 --> v1|v1|v0|v0
- "punpcklbh %[v], %[v], %[v] \n\t"
- "punpcklbh %[v], %[v], %[zero] \n\t"
- "paddsh %[g_vec], %[y], %[bg] \n\t"
- "pmullh %[temp], %[u], %[ug] \n\t"
- "psubsh %[g_vec], %[g_vec], %[temp] \n\t"
- "pmullh %[temp], %[v], %[vg] \n\t"
- "psubsh %[g_vec], %[g_vec], %[temp] \n\t"
- "psrah %[g_vec], %[g_vec], %[six] \n\t"
-
- "paddsh %[r_vec], %[y], %[br] \n\t"
- "pmullh %[temp], %[v], %[vr] \n\t"
- "psubsh %[r_vec], %[r_vec], %[temp] \n\t"
- "psrah %[r_vec], %[r_vec], %[six] \n\t"
-
- "packushb %[r_vec], %[b_vec], %[r_vec] \n\t"
- "packushb %[g_vec], %[g_vec], %[zero] \n\t"
- "punpcklbh %[b_vec], %[r_vec], %[g_vec] \n\t"
- "punpckhbh %[r_vec], %[r_vec], %[g_vec] \n\t"
- "punpcklhw %[g_vec], %[b_vec], %[r_vec] \n\t"
- "punpckhhw %[b_vec], %[b_vec], %[r_vec] \n\t"
-
- "psrlw %[temp], %[g_vec], %[three] \n\t"
- "and %[g_vec], %[temp], %[mask2] \n\t"
- "psrlw %[temp], %[temp], %[eight] \n\t"
- "and %[r_vec], %[temp], %[mask2] \n\t"
- "psllw %[r_vec], %[r_vec], %[lmove5] \n\t"
- "or %[g_vec], %[g_vec], %[r_vec] \n\t"
- "psrlw %[temp], %[temp], %[eight] \n\t"
- "and %[r_vec], %[temp], %[mask2] \n\t"
- "psllw %[r_vec], %[r_vec], %[lmove5] \n\t"
- "psllw %[r_vec], %[r_vec], %[lmove5] \n\t"
- "or %[g_vec], %[g_vec], %[r_vec] \n\t"
- "or %[g_vec], %[g_vec], %[mask3] \n\t"
-
- "psrlw %[temp], %[b_vec], %[three] \n\t"
- "and %[b_vec], %[temp], %[mask2] \n\t"
- "psrlw %[temp], %[temp], %[eight] \n\t"
- "and %[r_vec], %[temp], %[mask2] \n\t"
- "psllw %[r_vec], %[r_vec], %[lmove5] \n\t"
- "or %[b_vec], %[b_vec], %[r_vec] \n\t"
- "psrlw %[temp], %[temp], %[eight] \n\t"
- "and %[r_vec], %[temp], %[mask2] \n\t"
- "psllw %[r_vec], %[r_vec], %[lmove5] \n\t"
- "psllw %[r_vec], %[r_vec], %[lmove5] \n\t"
- "or %[b_vec], %[b_vec], %[r_vec] \n\t"
- "or %[b_vec], %[b_vec], %[mask3] \n\t"
-
- "punpcklhw %[r_vec], %[g_vec], %[b_vec] \n\t"
- "punpckhhw %[b_vec], %[g_vec], %[b_vec] \n\t"
- "punpcklhw %[g_vec], %[r_vec], %[b_vec] \n\t"
-
- "gssdlc1 %[g_vec], 0x07(%[dst_argb1555]) \n\t"
- "gssdrc1 %[g_vec], 0x00(%[dst_argb1555]) \n\t"
-
- "daddiu %[y_ptr], %[y_ptr], 0x04 \n\t"
- "daddiu %[u_ptr], %[u_ptr], 0x02 \n\t"
- "daddiu %[v_ptr], %[v_ptr], 0x02 \n\t"
- "daddiu %[dst_argb1555], %[dst_argb1555], 0x08 \n\t"
- "daddi %[width], %[width], -0x04 \n\t"
- "bnez %[width], 1b \n\t"
-
- : [y]"=&f"(y), [u]"=&f"(u),
- [v]"=&f"(v),
- [b_vec]"=&f"(b_vec), [g_vec]"=&f"(g_vec),
- [r_vec]"=&f"(r_vec), [temp]"=&f"(temp),
- [ub]"=&f"(ub), [ug]"=&f"(ug),
- [vg]"=&f"(vg), [vr]"=&f"(vr),
- [bb]"=&f"(bb), [bg]"=&f"(bg),
- [br]"=&f"(br), [yg]"=&f"(yg)
- : [y_ptr]"r"(src_y), [u_ptr]"r"(src_u),
- [v_ptr]"r"(src_v), [dst_argb1555]"r"(dst_argb1555),
- [yuvcons_ptr]"r"(yuvconstants), [width]"r"(width),
- [zero]"f"(0x00), [five]"f"(0x55),
- [six]"f"(0x6), [mask1]"f"(0xff00ff00ff00ff00),
- [three]"f"(0x3), [mask2]"f"(0x1f0000001f),
- [eight]"f"(0x8), [mask3]"f"(0x800000008000),
- [lmove5]"f"(0x5)
- : "memory"
- );
-}
-
-void I422ToRGB565Row_MMI(const uint8_t* src_y,
- const uint8_t* src_u,
- const uint8_t* src_v,
- uint8_t* dst_rgb565,
- const struct YuvConstants* yuvconstants,
- int width) {
- uint64_t y, u, v;
- uint64_t b_vec, g_vec, r_vec, temp;
- uint64_t ub,ug,vg,vr,bb,bg,br,yg;
-
- __asm__ volatile(
- "ldc1 %[yg], 0xc0(%[yuvcons_ptr]) \n\t"
- "ldc1 %[bb], 0x60(%[yuvcons_ptr]) \n\t"
- "ldc1 %[ub], 0x00(%[yuvcons_ptr]) \n\t"
- "or %[ub], %[ub], %[mask1] \n\t"
- "ldc1 %[bg], 0x80(%[yuvcons_ptr]) \n\t"
- "ldc1 %[ug], 0x20(%[yuvcons_ptr]) \n\t"
- "punpcklbh %[ug], %[ug], %[zero] \n\t"
- "pshufh %[ug], %[ug], %[zero] \n\t"
- "ldc1 %[vg], 0x20(%[yuvcons_ptr]) \n\t"
- "punpcklbh %[vg], %[vg], %[zero] \n\t"
- "pshufh %[vg], %[vg], %[five] \n\t"
- "ldc1 %[br], 0xa0(%[yuvcons_ptr]) \n\t"
- "ldc1 %[vr], 0x40(%[yuvcons_ptr]) \n\t"
- "punpcklbh %[vr], %[vr], %[zero] \n\t"
- "pshufh %[vr], %[vr], %[five] \n\t"
- "or %[vr], %[vr], %[mask1] \n\t"
-
- "1: \n\t"
- "gslwlc1 %[y], 0x03(%[y_ptr]) \n\t"
- "gslwrc1 %[y], 0x00(%[y_ptr]) \n\t"
- "gslwlc1 %[u], 0x03(%[u_ptr]) \n\t"
- "gslwrc1 %[u], 0x00(%[u_ptr]) \n\t"
- "gslwlc1 %[v], 0x03(%[v_ptr]) \n\t"
- "gslwrc1 %[v], 0x00(%[v_ptr]) \n\t"
-
- "punpcklbh %[y], %[y], %[y] \n\t"
- "pmulhuh %[y], %[y], %[yg] \n\t"
-
- //u3|u2|u1|u0 --> u1|u1|u0|u0
- "punpcklbh %[u], %[u], %[u] \n\t"
- "punpcklbh %[u], %[u], %[zero] \n\t"
- "paddsh %[b_vec], %[y], %[bb] \n\t"
- "pmullh %[temp], %[u], %[ub] \n\t"
- "psubsh %[b_vec], %[b_vec], %[temp] \n\t"
- "psrah %[b_vec], %[b_vec], %[six] \n\t"
-
- //v3|v2|v1|v0 --> v1|v1|v0|v0
- "punpcklbh %[v], %[v], %[v] \n\t"
- "punpcklbh %[v], %[v], %[zero] \n\t"
- "paddsh %[g_vec], %[y], %[bg] \n\t"
- "pmullh %[temp], %[u], %[ug] \n\t"
- "psubsh %[g_vec], %[g_vec], %[temp] \n\t"
- "pmullh %[temp], %[v], %[vg] \n\t"
- "psubsh %[g_vec], %[g_vec], %[temp] \n\t"
- "psrah %[g_vec], %[g_vec], %[six] \n\t"
-
- "paddsh %[r_vec], %[y], %[br] \n\t"
- "pmullh %[temp], %[v], %[vr] \n\t"
- "psubsh %[r_vec], %[r_vec], %[temp] \n\t"
- "psrah %[r_vec], %[r_vec], %[six] \n\t"
-
- "packushb %[r_vec], %[b_vec], %[r_vec] \n\t"
- "packushb %[g_vec], %[g_vec], %[zero] \n\t"
- "punpcklbh %[b_vec], %[r_vec], %[g_vec] \n\t"
- "punpckhbh %[r_vec], %[r_vec], %[g_vec] \n\t"
- "punpcklhw %[g_vec], %[b_vec], %[r_vec] \n\t"
- "punpckhhw %[b_vec], %[b_vec], %[r_vec] \n\t"
-
- "psrlh %[temp], %[g_vec], %[three] \n\t"
- "and %[g_vec], %[temp], %[mask2] \n\t"
- "psrlw %[temp], %[temp], %[seven] \n\t"
- "psrlw %[r_vec], %[mask1], %[eight] \n\t"
- "and %[r_vec], %[temp], %[r_vec] \n\t"
- "psllw %[r_vec], %[r_vec], %[lmove5] \n\t"
- "or %[g_vec], %[g_vec], %[r_vec] \n\t"
- "paddb %[r_vec], %[three], %[six] \n\t"
- "psrlw %[temp], %[temp], %[r_vec] \n\t"
- "and %[r_vec], %[temp], %[mask2] \n\t"
- "paddb %[temp], %[three], %[eight] \n\t"
- "psllw %[r_vec], %[r_vec], %[temp] \n\t"
- "or %[g_vec], %[g_vec], %[r_vec] \n\t"
-
- "psrlh %[temp], %[b_vec], %[three] \n\t"
- "and %[b_vec], %[temp], %[mask2] \n\t"
- "psrlw %[temp], %[temp], %[seven] \n\t"
- "psrlw %[r_vec], %[mask1], %[eight] \n\t"
- "and %[r_vec], %[temp], %[r_vec] \n\t"
- "psllw %[r_vec], %[r_vec], %[lmove5] \n\t"
- "or %[b_vec], %[b_vec], %[r_vec] \n\t"
- "paddb %[r_vec], %[three], %[six] \n\t"
- "psrlw %[temp], %[temp], %[r_vec] \n\t"
- "and %[r_vec], %[temp], %[mask2] \n\t"
- "paddb %[temp], %[three], %[eight] \n\t"
- "psllw %[r_vec], %[r_vec], %[temp] \n\t"
- "or %[b_vec], %[b_vec], %[r_vec] \n\t"
-
- "punpcklhw %[r_vec], %[g_vec], %[b_vec] \n\t"
- "punpckhhw %[b_vec], %[g_vec], %[b_vec] \n\t"
- "punpcklhw %[g_vec], %[r_vec], %[b_vec] \n\t"
-
- "gssdlc1 %[g_vec], 0x07(%[dst_rgb565]) \n\t"
- "gssdrc1 %[g_vec], 0x00(%[dst_rgb565]) \n\t"
-
- "daddiu %[y_ptr], %[y_ptr], 0x04 \n\t"
- "daddiu %[u_ptr], %[u_ptr], 0x02 \n\t"
- "daddiu %[v_ptr], %[v_ptr], 0x02 \n\t"
- "daddiu %[dst_rgb565], %[dst_rgb565], 0x08 \n\t"
- "daddi %[width], %[width], -0x04 \n\t"
- "bnez %[width], 1b \n\t"
-
- : [y]"=&f"(y), [u]"=&f"(u),
- [v]"=&f"(v),
- [b_vec]"=&f"(b_vec), [g_vec]"=&f"(g_vec),
- [r_vec]"=&f"(r_vec), [temp]"=&f"(temp),
- [ub]"=&f"(ub), [ug]"=&f"(ug),
- [vg]"=&f"(vg), [vr]"=&f"(vr),
- [bb]"=&f"(bb), [bg]"=&f"(bg),
- [br]"=&f"(br), [yg]"=&f"(yg)
- : [y_ptr]"r"(src_y), [u_ptr]"r"(src_u),
- [v_ptr]"r"(src_v), [dst_rgb565]"r"(dst_rgb565),
- [yuvcons_ptr]"r"(yuvconstants), [width]"r"(width),
- [zero]"f"(0x00), [five]"f"(0x55),
- [six]"f"(0x6), [mask1]"f"(0xff00ff00ff00ff00),
- [three]"f"(0x3), [mask2]"f"(0x1f0000001f),
- [eight]"f"(0x8), [seven]"f"(0x7),
- [lmove5]"f"(0x5)
- : "memory"
- );
-}
-
-void NV12ToARGBRow_MMI(const uint8_t* src_y,
- const uint8_t* src_uv,
- uint8_t* rgb_buf,
- const struct YuvConstants* yuvconstants,
- int width) {
- uint64_t y, u, v;
- uint64_t b_vec, g_vec, r_vec, temp;
- uint64_t ub,ug,vg,vr,bb,bg,br,yg;
-
- __asm__ volatile(
- "ldc1 %[yg], 0xc0(%[yuvcons_ptr]) \n\t"
- "ldc1 %[bb], 0x60(%[yuvcons_ptr]) \n\t"
- "ldc1 %[ub], 0x00(%[yuvcons_ptr]) \n\t"
- "or %[ub], %[ub], %[mask1] \n\t"
- "ldc1 %[bg], 0x80(%[yuvcons_ptr]) \n\t"
- "ldc1 %[ug], 0x20(%[yuvcons_ptr]) \n\t"
- "punpcklbh %[ug], %[ug], %[zero] \n\t"
- "pshufh %[ug], %[ug], %[zero] \n\t"
- "ldc1 %[vg], 0x20(%[yuvcons_ptr]) \n\t"
- "punpcklbh %[vg], %[vg], %[zero] \n\t"
- "pshufh %[vg], %[vg], %[five] \n\t"
- "ldc1 %[br], 0xa0(%[yuvcons_ptr]) \n\t"
- "ldc1 %[vr], 0x40(%[yuvcons_ptr]) \n\t"
- "punpcklbh %[vr], %[vr], %[zero] \n\t"
- "pshufh %[vr], %[vr], %[five] \n\t"
- "or %[vr], %[vr], %[mask1] \n\t"
-
- "1: \n\t"
- "gslwlc1 %[y], 0x03(%[y_ptr]) \n\t"
- "gslwrc1 %[y], 0x00(%[y_ptr]) \n\t"
- "gslwlc1 %[u], 0x03(%[uv_ptr]) \n\t"
- "gslwrc1 %[u], 0x00(%[uv_ptr]) \n\t"
- "punpcklbh %[u], %[u], %[zero] \n\t"
- "pshufh %[v], %[u], %[vshu] \n\t"
- "pshufh %[u], %[u], %[ushu] \n\t"
-
- "punpcklbh %[y], %[y], %[y] \n\t"
- "pmulhuh %[y], %[y], %[yg] \n\t"
-
- "paddsh %[b_vec], %[y], %[bb] \n\t"
- "pmullh %[temp], %[u], %[ub] \n\t"
- "psubsh %[b_vec], %[b_vec], %[temp] \n\t"
- "psrah %[b_vec], %[b_vec], %[six] \n\t"
-
- "paddsh %[g_vec], %[y], %[bg] \n\t"
- "pmullh %[temp], %[u], %[ug] \n\t"
- "psubsh %[g_vec], %[g_vec], %[temp] \n\t"
- "pmullh %[temp], %[v], %[vg] \n\t"
- "psubsh %[g_vec], %[g_vec], %[temp] \n\t"
- "psrah %[g_vec], %[g_vec], %[six] \n\t"
-
- "paddsh %[r_vec], %[y], %[br] \n\t"
- "pmullh %[temp], %[v], %[vr] \n\t"
- "psubsh %[r_vec], %[r_vec], %[temp] \n\t"
- "psrah %[r_vec], %[r_vec], %[six] \n\t"
-
- "packushb %[r_vec], %[b_vec], %[r_vec] \n\t"
- "packushb %[g_vec], %[g_vec], %[zero] \n\t"
- "punpcklwd %[g_vec], %[g_vec], %[alpha] \n\t"
- "punpcklbh %[b_vec], %[r_vec], %[g_vec] \n\t"
- "punpckhbh %[r_vec], %[r_vec], %[g_vec] \n\t"
- "punpcklhw %[g_vec], %[b_vec], %[r_vec] \n\t"
- "punpckhhw %[b_vec], %[b_vec], %[r_vec] \n\t"
-
- "gssdlc1 %[g_vec], 0x07(%[rgbbuf_ptr]) \n\t"
- "gssdrc1 %[g_vec], 0x00(%[rgbbuf_ptr]) \n\t"
- "gssdlc1 %[b_vec], 0x0f(%[rgbbuf_ptr]) \n\t"
- "gssdrc1 %[b_vec], 0x08(%[rgbbuf_ptr]) \n\t"
-
- "daddiu %[y_ptr], %[y_ptr], 0x04 \n\t"
- "daddiu %[uv_ptr], %[uv_ptr], 0x04 \n\t"
- "daddiu %[rgbbuf_ptr], %[rgbbuf_ptr], 0x10 \n\t"
- "daddi %[width], %[width], -0x04 \n\t"
- "bnez %[width], 1b \n\t"
-
- : [y]"=&f"(y), [u]"=&f"(u),
- [v]"=&f"(v),
- [b_vec]"=&f"(b_vec), [g_vec]"=&f"(g_vec),
- [r_vec]"=&f"(r_vec), [temp]"=&f"(temp),
- [ub]"=&f"(ub), [ug]"=&f"(ug),
- [vg]"=&f"(vg), [vr]"=&f"(vr),
- [bb]"=&f"(bb), [bg]"=&f"(bg),
- [br]"=&f"(br), [yg]"=&f"(yg)
- : [y_ptr]"r"(src_y), [uv_ptr]"r"(src_uv),
- [rgbbuf_ptr]"r"(rgb_buf),
- [yuvcons_ptr]"r"(yuvconstants), [width]"r"(width),
- [zero]"f"(0x00), [five]"f"(0x55),
- [six]"f"(0x6), [mask1]"f"(0xff00ff00ff00ff00),
- [ushu]"f"(0xA0), [vshu]"f"(0xf5),
- [alpha]"f"(-1)
- : "memory"
- );
-}
-
-void NV21ToARGBRow_MMI(const uint8_t* src_y,
- const uint8_t* src_vu,
- uint8_t* rgb_buf,
- const struct YuvConstants* yuvconstants,
- int width) {
- uint64_t y, u, v;
- uint64_t b_vec, g_vec, r_vec, temp;
- uint64_t ub,ug,vg,vr,bb,bg,br,yg;
-
- __asm__ volatile(
- "ldc1 %[yg], 0xc0(%[yuvcons_ptr]) \n\t"
- "ldc1 %[bb], 0x60(%[yuvcons_ptr]) \n\t"
- "ldc1 %[ub], 0x00(%[yuvcons_ptr]) \n\t"
- "or %[ub], %[ub], %[mask1] \n\t"
- "ldc1 %[bg], 0x80(%[yuvcons_ptr]) \n\t"
- "ldc1 %[ug], 0x20(%[yuvcons_ptr]) \n\t"
- "punpcklbh %[ug], %[ug], %[zero] \n\t"
- "pshufh %[ug], %[ug], %[zero] \n\t"
- "ldc1 %[vg], 0x20(%[yuvcons_ptr]) \n\t"
- "punpcklbh %[vg], %[vg], %[zero] \n\t"
- "pshufh %[vg], %[vg], %[five] \n\t"
- "ldc1 %[br], 0xa0(%[yuvcons_ptr]) \n\t"
- "ldc1 %[vr], 0x40(%[yuvcons_ptr]) \n\t"
- "punpcklbh %[vr], %[vr], %[zero] \n\t"
- "pshufh %[vr], %[vr], %[five] \n\t"
- "or %[vr], %[vr], %[mask1] \n\t"
-
- "1: \n\t"
- "gslwlc1 %[y], 0x03(%[y_ptr]) \n\t"
- "gslwrc1 %[y], 0x00(%[y_ptr]) \n\t"
- "gslwlc1 %[u], 0x03(%[vu_ptr]) \n\t"
- "gslwrc1 %[u], 0x00(%[vu_ptr]) \n\t"
- "punpcklbh %[u], %[u], %[zero] \n\t"
- "pshufh %[v], %[u], %[ushu] \n\t"
- "pshufh %[u], %[u], %[vshu] \n\t"
-
- "punpcklbh %[y], %[y], %[y] \n\t"
- "pmulhuh %[y], %[y], %[yg] \n\t"
-
- "paddsh %[b_vec], %[y], %[bb] \n\t"
- "pmullh %[temp], %[u], %[ub] \n\t"
- "psubsh %[b_vec], %[b_vec], %[temp] \n\t"
- "psrah %[b_vec], %[b_vec], %[six] \n\t"
-
- "paddsh %[g_vec], %[y], %[bg] \n\t"
- "pmullh %[temp], %[u], %[ug] \n\t"
- "psubsh %[g_vec], %[g_vec], %[temp] \n\t"
- "pmullh %[temp], %[v], %[vg] \n\t"
- "psubsh %[g_vec], %[g_vec], %[temp] \n\t"
- "psrah %[g_vec], %[g_vec], %[six] \n\t"
-
- "paddsh %[r_vec], %[y], %[br] \n\t"
- "pmullh %[temp], %[v], %[vr] \n\t"
- "psubsh %[r_vec], %[r_vec], %[temp] \n\t"
- "psrah %[r_vec], %[r_vec], %[six] \n\t"
-
- "packushb %[r_vec], %[b_vec], %[r_vec] \n\t"
- "packushb %[g_vec], %[g_vec], %[zero] \n\t"
- "punpcklwd %[g_vec], %[g_vec], %[alpha] \n\t"
- "punpcklbh %[b_vec], %[r_vec], %[g_vec] \n\t"
- "punpckhbh %[r_vec], %[r_vec], %[g_vec] \n\t"
- "punpcklhw %[g_vec], %[b_vec], %[r_vec] \n\t"
- "punpckhhw %[b_vec], %[b_vec], %[r_vec] \n\t"
-
- "gssdlc1 %[g_vec], 0x07(%[rgbbuf_ptr]) \n\t"
- "gssdrc1 %[g_vec], 0x00(%[rgbbuf_ptr]) \n\t"
- "gssdlc1 %[b_vec], 0x0f(%[rgbbuf_ptr]) \n\t"
- "gssdrc1 %[b_vec], 0x08(%[rgbbuf_ptr]) \n\t"
-
- "daddiu %[y_ptr], %[y_ptr], 0x04 \n\t"
- "daddiu %[vu_ptr], %[vu_ptr], 0x04 \n\t"
- "daddiu %[rgbbuf_ptr], %[rgbbuf_ptr], 0x10 \n\t"
- "daddi %[width], %[width], -0x04 \n\t"
- "bnez %[width], 1b \n\t"
-
- : [y]"=&f"(y), [u]"=&f"(u),
- [v]"=&f"(v),
- [b_vec]"=&f"(b_vec), [g_vec]"=&f"(g_vec),
- [r_vec]"=&f"(r_vec), [temp]"=&f"(temp),
- [ub]"=&f"(ub), [ug]"=&f"(ug),
- [vg]"=&f"(vg), [vr]"=&f"(vr),
- [bb]"=&f"(bb), [bg]"=&f"(bg),
- [br]"=&f"(br), [yg]"=&f"(yg)
- : [y_ptr]"r"(src_y), [vu_ptr]"r"(src_vu),
- [rgbbuf_ptr]"r"(rgb_buf),
- [yuvcons_ptr]"r"(yuvconstants), [width]"r"(width),
- [zero]"f"(0x00), [five]"f"(0x55),
- [six]"f"(0x6), [mask1]"f"(0xff00ff00ff00ff00),
- [ushu]"f"(0xA0), [vshu]"f"(0xf5),
- [alpha]"f"(-1)
- : "memory"
- );
-}
-
-void NV12ToRGB24Row_MMI(const uint8_t* src_y,
- const uint8_t* src_uv,
- uint8_t* rgb_buf,
- const struct YuvConstants* yuvconstants,
- int width) {
- uint64_t y, u, v;
- uint64_t b_vec, g_vec, r_vec, temp;
- uint64_t ub,ug,vg,vr,bb,bg,br,yg;
-
- __asm__ volatile(
- "ldc1 %[yg], 0xc0(%[yuvcons_ptr]) \n\t"
- "ldc1 %[bb], 0x60(%[yuvcons_ptr]) \n\t"
- "ldc1 %[ub], 0x00(%[yuvcons_ptr]) \n\t"
- "or %[ub], %[ub], %[mask1] \n\t"
- "ldc1 %[bg], 0x80(%[yuvcons_ptr]) \n\t"
- "ldc1 %[ug], 0x20(%[yuvcons_ptr]) \n\t"
- "punpcklbh %[ug], %[ug], %[zero] \n\t"
- "pshufh %[ug], %[ug], %[zero] \n\t"
- "ldc1 %[vg], 0x20(%[yuvcons_ptr]) \n\t"
- "punpcklbh %[vg], %[vg], %[zero] \n\t"
- "pshufh %[vg], %[vg], %[five] \n\t"
- "ldc1 %[br], 0xa0(%[yuvcons_ptr]) \n\t"
- "ldc1 %[vr], 0x40(%[yuvcons_ptr]) \n\t"
- "punpcklbh %[vr], %[vr], %[zero] \n\t"
- "pshufh %[vr], %[vr], %[five] \n\t"
- "or %[vr], %[vr], %[mask1] \n\t"
-
- "1: \n\t"
- "gslwlc1 %[y], 0x03(%[y_ptr]) \n\t"
- "gslwrc1 %[y], 0x00(%[y_ptr]) \n\t"
- "gslwlc1 %[u], 0x03(%[uv_ptr]) \n\t"
- "gslwrc1 %[u], 0x00(%[uv_ptr]) \n\t"
- "punpcklbh %[u], %[u], %[zero] \n\t"
- "pshufh %[v], %[u], %[vshu] \n\t"
- "pshufh %[u], %[u], %[ushu] \n\t"
-
- "punpcklbh %[y], %[y], %[y] \n\t"
- "pmulhuh %[y], %[y], %[yg] \n\t"
-
- "paddsh %[b_vec], %[y], %[bb] \n\t"
- "pmullh %[temp], %[u], %[ub] \n\t"
- "psubsh %[b_vec], %[b_vec], %[temp] \n\t"
- "psrah %[b_vec], %[b_vec], %[six] \n\t"
-
- "paddsh %[g_vec], %[y], %[bg] \n\t"
- "pmullh %[temp], %[u], %[ug] \n\t"
- "psubsh %[g_vec], %[g_vec], %[temp] \n\t"
- "pmullh %[temp], %[v], %[vg] \n\t"
- "psubsh %[g_vec], %[g_vec], %[temp] \n\t"
- "psrah %[g_vec], %[g_vec], %[six] \n\t"
-
- "paddsh %[r_vec], %[y], %[br] \n\t"
- "pmullh %[temp], %[v], %[vr] \n\t"
- "psubsh %[r_vec], %[r_vec], %[temp] \n\t"
- "psrah %[r_vec], %[r_vec], %[six] \n\t"
-
- "packushb %[r_vec], %[b_vec], %[r_vec] \n\t"
- "packushb %[g_vec], %[g_vec], %[zero] \n\t"
- "punpcklbh %[b_vec], %[r_vec], %[g_vec] \n\t"
- "punpckhbh %[r_vec], %[r_vec], %[g_vec] \n\t"
- "punpcklhw %[g_vec], %[b_vec], %[r_vec] \n\t"
- "punpckhhw %[b_vec], %[b_vec], %[r_vec] \n\t"
-
- "punpckhwd %[r_vec], %[g_vec], %[g_vec] \n\t"
- "psllw %[temp], %[r_vec], %[lmove1] \n\t"
- "or %[g_vec], %[g_vec], %[temp] \n\t"
- "psrlw %[temp], %[r_vec], %[rmove1] \n\t"
- "pextrh %[temp], %[temp], %[zero] \n\t"
- "pinsrh_2 %[g_vec], %[g_vec], %[temp] \n\t"
- "pextrh %[temp], %[b_vec], %[zero] \n\t"
- "pinsrh_3 %[g_vec], %[g_vec], %[temp] \n\t"
- "pextrh %[temp], %[b_vec], %[one] \n\t"
- "punpckhwd %[b_vec], %[b_vec], %[b_vec] \n\t"
- "psllw %[b_vec], %[b_vec], %[rmove1] \n\t"
- "or %[b_vec], %[b_vec], %[temp] \n\t"
- "gssdlc1 %[g_vec], 0x07(%[rgbbuf_ptr]) \n\t"
- "gssdrc1 %[g_vec], 0x00(%[rgbbuf_ptr]) \n\t"
- "gsswlc1 %[b_vec], 0x0b(%[rgbbuf_ptr]) \n\t"
- "gsswrc1 %[b_vec], 0x08(%[rgbbuf_ptr]) \n\t"
-
- "daddiu %[y_ptr], %[y_ptr], 0x04 \n\t"
- "daddiu %[uv_ptr], %[uv_ptr], 0x04 \n\t"
- "daddiu %[rgbbuf_ptr], %[rgbbuf_ptr], 0x0C \n\t"
- "daddi %[width], %[width], -0x04 \n\t"
- "bnez %[width], 1b \n\t"
-
- : [y]"=&f"(y), [u]"=&f"(u),
- [v]"=&f"(v),
- [b_vec]"=&f"(b_vec), [g_vec]"=&f"(g_vec),
- [r_vec]"=&f"(r_vec), [temp]"=&f"(temp),
- [ub]"=&f"(ub), [ug]"=&f"(ug),
- [vg]"=&f"(vg), [vr]"=&f"(vr),
- [bb]"=&f"(bb), [bg]"=&f"(bg),
- [br]"=&f"(br), [yg]"=&f"(yg)
- : [y_ptr]"r"(src_y), [uv_ptr]"r"(src_uv),
- [rgbbuf_ptr]"r"(rgb_buf),
- [yuvcons_ptr]"r"(yuvconstants), [width]"r"(width),
- [zero]"f"(0x00), [five]"f"(0x55),
- [six]"f"(0x6), [mask1]"f"(0xff00ff00ff00ff00),
- [ushu]"f"(0xA0), [vshu]"f"(0xf5),
- [alpha]"f"(-1), [lmove1]"f"(0x18),
- [one]"f"(0x1), [rmove1]"f"(0x8)
- : "memory"
- );
-}
-
-void NV21ToRGB24Row_MMI(const uint8_t* src_y,
- const uint8_t* src_vu,
- uint8_t* rgb_buf,
- const struct YuvConstants* yuvconstants,
- int width) {
- uint64_t y, u, v;
- uint64_t b_vec, g_vec, r_vec, temp;
- uint64_t ub,ug,vg,vr,bb,bg,br,yg;
-
- __asm__ volatile(
- "ldc1 %[yg], 0xc0(%[yuvcons_ptr]) \n\t"
- "ldc1 %[bb], 0x60(%[yuvcons_ptr]) \n\t"
- "ldc1 %[ub], 0x00(%[yuvcons_ptr]) \n\t"
- "or %[ub], %[ub], %[mask1] \n\t"
- "ldc1 %[bg], 0x80(%[yuvcons_ptr]) \n\t"
- "ldc1 %[ug], 0x20(%[yuvcons_ptr]) \n\t"
- "punpcklbh %[ug], %[ug], %[zero] \n\t"
- "pshufh %[ug], %[ug], %[zero] \n\t"
- "ldc1 %[vg], 0x20(%[yuvcons_ptr]) \n\t"
- "punpcklbh %[vg], %[vg], %[zero] \n\t"
- "pshufh %[vg], %[vg], %[five] \n\t"
- "ldc1 %[br], 0xa0(%[yuvcons_ptr]) \n\t"
- "ldc1 %[vr], 0x40(%[yuvcons_ptr]) \n\t"
- "punpcklbh %[vr], %[vr], %[zero] \n\t"
- "pshufh %[vr], %[vr], %[five] \n\t"
- "or %[vr], %[vr], %[mask1] \n\t"
-
- "1: \n\t"
- "gslwlc1 %[y], 0x03(%[y_ptr]) \n\t"
- "gslwrc1 %[y], 0x00(%[y_ptr]) \n\t"
- "gslwlc1 %[u], 0x03(%[vu_ptr]) \n\t"
- "gslwrc1 %[u], 0x00(%[vu_ptr]) \n\t"
- "punpcklbh %[u], %[u], %[zero] \n\t"
- "pshufh %[v], %[u], %[ushu] \n\t"
- "pshufh %[u], %[u], %[vshu] \n\t"
-
- "punpcklbh %[y], %[y], %[y] \n\t"
- "pmulhuh %[y], %[y], %[yg] \n\t"
-
- "paddsh %[b_vec], %[y], %[bb] \n\t"
- "pmullh %[temp], %[u], %[ub] \n\t"
- "psubsh %[b_vec], %[b_vec], %[temp] \n\t"
- "psrah %[b_vec], %[b_vec], %[six] \n\t"
-
- "paddsh %[g_vec], %[y], %[bg] \n\t"
- "pmullh %[temp], %[u], %[ug] \n\t"
- "psubsh %[g_vec], %[g_vec], %[temp] \n\t"
- "pmullh %[temp], %[v], %[vg] \n\t"
- "psubsh %[g_vec], %[g_vec], %[temp] \n\t"
- "psrah %[g_vec], %[g_vec], %[six] \n\t"
-
- "paddsh %[r_vec], %[y], %[br] \n\t"
- "pmullh %[temp], %[v], %[vr] \n\t"
- "psubsh %[r_vec], %[r_vec], %[temp] \n\t"
- "psrah %[r_vec], %[r_vec], %[six] \n\t"
-
- "packushb %[r_vec], %[b_vec], %[r_vec] \n\t"
- "packushb %[g_vec], %[g_vec], %[zero] \n\t"
- "punpcklbh %[b_vec], %[r_vec], %[g_vec] \n\t"
- "punpckhbh %[r_vec], %[r_vec], %[g_vec] \n\t"
- "punpcklhw %[g_vec], %[b_vec], %[r_vec] \n\t"
- "punpckhhw %[b_vec], %[b_vec], %[r_vec] \n\t"
-
- "punpckhwd %[r_vec], %[g_vec], %[g_vec] \n\t"
- "psllw %[temp], %[r_vec], %[lmove1] \n\t"
- "or %[g_vec], %[g_vec], %[temp] \n\t"
- "psrlw %[temp], %[r_vec], %[rmove1] \n\t"
- "pextrh %[temp], %[temp], %[zero] \n\t"
- "pinsrh_2 %[g_vec], %[g_vec], %[temp] \n\t"
- "pextrh %[temp], %[b_vec], %[zero] \n\t"
- "pinsrh_3 %[g_vec], %[g_vec], %[temp] \n\t"
- "pextrh %[temp], %[b_vec], %[one] \n\t"
- "punpckhwd %[b_vec], %[b_vec], %[b_vec] \n\t"
- "psllw %[b_vec], %[b_vec], %[rmove1] \n\t"
- "or %[b_vec], %[b_vec], %[temp] \n\t"
- "gssdlc1 %[g_vec], 0x07(%[rgbbuf_ptr]) \n\t"
- "gssdrc1 %[g_vec], 0x00(%[rgbbuf_ptr]) \n\t"
- "gsswlc1 %[b_vec], 0x0b(%[rgbbuf_ptr]) \n\t"
- "gsswrc1 %[b_vec], 0x08(%[rgbbuf_ptr]) \n\t"
-
- "daddiu %[y_ptr], %[y_ptr], 0x04 \n\t"
- "daddiu %[vu_ptr], %[vu_ptr], 0x04 \n\t"
- "daddiu %[rgbbuf_ptr], %[rgbbuf_ptr], 0x0C \n\t"
- "daddi %[width], %[width], -0x04 \n\t"
- "bnez %[width], 1b \n\t"
-
- : [y]"=&f"(y), [u]"=&f"(u),
- [v]"=&f"(v),
- [b_vec]"=&f"(b_vec), [g_vec]"=&f"(g_vec),
- [r_vec]"=&f"(r_vec), [temp]"=&f"(temp),
- [ub]"=&f"(ub), [ug]"=&f"(ug),
- [vg]"=&f"(vg), [vr]"=&f"(vr),
- [bb]"=&f"(bb), [bg]"=&f"(bg),
- [br]"=&f"(br), [yg]"=&f"(yg)
- : [y_ptr]"r"(src_y), [vu_ptr]"r"(src_vu),
- [rgbbuf_ptr]"r"(rgb_buf),
- [yuvcons_ptr]"r"(yuvconstants), [width]"r"(width),
- [zero]"f"(0x00), [five]"f"(0x55),
- [six]"f"(0x6), [mask1]"f"(0xff00ff00ff00ff00),
- [ushu]"f"(0xA0), [vshu]"f"(0xf5),
- [lmove1]"f"(0x18), [rmove1]"f"(0x8),
- [one]"f"(0x1)
- : "memory"
- );
-}
-
-void NV12ToRGB565Row_MMI(const uint8_t* src_y,
- const uint8_t* src_uv,
- uint8_t* dst_rgb565,
- const struct YuvConstants* yuvconstants,
- int width) {
- uint64_t y, u, v;
- uint64_t b_vec, g_vec, r_vec, temp;
- uint64_t ub,ug,vg,vr,bb,bg,br,yg;
-
- __asm__ volatile(
- "ldc1 %[yg], 0xc0(%[yuvcons_ptr]) \n\t"
- "ldc1 %[bb], 0x60(%[yuvcons_ptr]) \n\t"
- "ldc1 %[ub], 0x00(%[yuvcons_ptr]) \n\t"
- "or %[ub], %[ub], %[mask1] \n\t"
- "ldc1 %[bg], 0x80(%[yuvcons_ptr]) \n\t"
- "ldc1 %[ug], 0x20(%[yuvcons_ptr]) \n\t"
- "punpcklbh %[ug], %[ug], %[zero] \n\t"
- "pshufh %[ug], %[ug], %[zero] \n\t"
- "ldc1 %[vg], 0x20(%[yuvcons_ptr]) \n\t"
- "punpcklbh %[vg], %[vg], %[zero] \n\t"
- "pshufh %[vg], %[vg], %[five] \n\t"
- "ldc1 %[br], 0xa0(%[yuvcons_ptr]) \n\t"
- "ldc1 %[vr], 0x40(%[yuvcons_ptr]) \n\t"
- "punpcklbh %[vr], %[vr], %[zero] \n\t"
- "pshufh %[vr], %[vr], %[five] \n\t"
- "or %[vr], %[vr], %[mask1] \n\t"
-
- "1: \n\t"
- "gslwlc1 %[y], 0x03(%[y_ptr]) \n\t"
- "gslwrc1 %[y], 0x00(%[y_ptr]) \n\t"
- "gslwlc1 %[u], 0x03(%[uv_ptr]) \n\t"
- "gslwrc1 %[u], 0x00(%[uv_ptr]) \n\t"
- "punpcklbh %[u], %[u], %[zero] \n\t"
- "pshufh %[v], %[u], %[vshu] \n\t"
- "pshufh %[u], %[u], %[ushu] \n\t"
-
- "punpcklbh %[y], %[y], %[y] \n\t"
- "pmulhuh %[y], %[y], %[yg] \n\t"
-
- "paddsh %[b_vec], %[y], %[bb] \n\t"
- "pmullh %[temp], %[u], %[ub] \n\t"
- "psubsh %[b_vec], %[b_vec], %[temp] \n\t"
- "psrah %[b_vec], %[b_vec], %[six] \n\t"
-
- "paddsh %[g_vec], %[y], %[bg] \n\t"
- "pmullh %[temp], %[u], %[ug] \n\t"
- "psubsh %[g_vec], %[g_vec], %[temp] \n\t"
- "pmullh %[temp], %[v], %[vg] \n\t"
- "psubsh %[g_vec], %[g_vec], %[temp] \n\t"
- "psrah %[g_vec], %[g_vec], %[six] \n\t"
-
- "paddsh %[r_vec], %[y], %[br] \n\t"
- "pmullh %[temp], %[v], %[vr] \n\t"
- "psubsh %[r_vec], %[r_vec], %[temp] \n\t"
- "psrah %[r_vec], %[r_vec], %[six] \n\t"
-
- "packushb %[r_vec], %[b_vec], %[r_vec] \n\t"
- "packushb %[g_vec], %[g_vec], %[zero] \n\t"
- "punpcklbh %[b_vec], %[r_vec], %[g_vec] \n\t"
- "punpckhbh %[r_vec], %[r_vec], %[g_vec] \n\t"
- "punpcklhw %[g_vec], %[b_vec], %[r_vec] \n\t"
- "punpckhhw %[b_vec], %[b_vec], %[r_vec] \n\t"
-
- "psrlh %[temp], %[g_vec], %[three] \n\t"
- "and %[g_vec], %[temp], %[mask2] \n\t"
- "psrlw %[temp], %[temp], %[seven] \n\t"
- "psrlw %[r_vec], %[mask1], %[eight] \n\t"
- "and %[r_vec], %[temp], %[r_vec] \n\t"
- "psubb %[y], %[eight], %[three] \n\t"//5
- "psllw %[r_vec], %[r_vec], %[y] \n\t"
- "or %[g_vec], %[g_vec], %[r_vec] \n\t"
- "paddb %[r_vec], %[three], %[six] \n\t"
- "psrlw %[temp], %[temp], %[r_vec] \n\t"
- "and %[r_vec], %[temp], %[mask2] \n\t"
- "paddb %[temp], %[three], %[eight] \n\t"
- "psllw %[r_vec], %[r_vec], %[temp] \n\t"
- "or %[g_vec], %[g_vec], %[r_vec] \n\t"
-
- "psrlh %[temp], %[b_vec], %[three] \n\t"
- "and %[b_vec], %[temp], %[mask2] \n\t"
- "psrlw %[temp], %[temp], %[seven] \n\t"
- "psrlw %[r_vec], %[mask1], %[eight] \n\t"
- "and %[r_vec], %[temp], %[r_vec] \n\t"
- "psubb %[y], %[eight], %[three] \n\t"//5
- "psllw %[r_vec], %[r_vec], %[y] \n\t"
- "or %[b_vec], %[b_vec], %[r_vec] \n\t"
- "paddb %[r_vec], %[three], %[six] \n\t"
- "psrlw %[temp], %[temp], %[r_vec] \n\t"
- "and %[r_vec], %[temp], %[mask2] \n\t"
- "paddb %[temp], %[three], %[eight] \n\t"
- "psllw %[r_vec], %[r_vec], %[temp] \n\t"
- "or %[b_vec], %[b_vec], %[r_vec] \n\t"
-
- "punpcklhw %[r_vec], %[g_vec], %[b_vec] \n\t"
- "punpckhhw %[b_vec], %[g_vec], %[b_vec] \n\t"
- "punpcklhw %[g_vec], %[r_vec], %[b_vec] \n\t"
-
- "gssdlc1 %[g_vec], 0x07(%[dst_rgb565]) \n\t"
- "gssdrc1 %[g_vec], 0x00(%[dst_rgb565]) \n\t"
-
- "daddiu %[y_ptr], %[y_ptr], 0x04 \n\t"
- "daddiu %[uv_ptr], %[uv_ptr], 0x04 \n\t"
- "daddiu %[dst_rgb565], %[dst_rgb565], 0x08 \n\t"
- "daddi %[width], %[width], -0x04 \n\t"
- "bnez %[width], 1b \n\t"
-
- : [y]"=&f"(y), [u]"=&f"(u),
- [v]"=&f"(v),
- [b_vec]"=&f"(b_vec), [g_vec]"=&f"(g_vec),
- [r_vec]"=&f"(r_vec), [temp]"=&f"(temp),
- [ub]"=&f"(ub), [ug]"=&f"(ug),
- [vg]"=&f"(vg), [vr]"=&f"(vr),
- [bb]"=&f"(bb), [bg]"=&f"(bg),
- [br]"=&f"(br), [yg]"=&f"(yg)
- : [y_ptr]"r"(src_y), [uv_ptr]"r"(src_uv),
- [dst_rgb565]"r"(dst_rgb565),
- [yuvcons_ptr]"r"(yuvconstants), [width]"r"(width),
- [zero]"f"(0x00), [five]"f"(0x55),
- [six]"f"(0x6), [mask1]"f"(0xff00ff00ff00ff00),
- [ushu]"f"(0xA0), [vshu]"f"(0xf5),
- [three]"f"(0x3), [mask2]"f"(0x1f0000001f),
- [eight]"f"(0x8), [seven]"f"(0x7)
- : "memory"
- );
-}
-
-void YUY2ToARGBRow_MMI(const uint8_t* src_yuy2,
- uint8_t* rgb_buf,
- const struct YuvConstants* yuvconstants,
- int width) {
- uint64_t y, u, v;
- uint64_t b_vec, g_vec, r_vec, temp;
- uint64_t ub,ug,vg,vr,bb,bg,br,yg;
-
- __asm__ volatile(
- "ldc1 %[yg], 0xc0(%[yuvcons_ptr]) \n\t"
- "ldc1 %[bb], 0x60(%[yuvcons_ptr]) \n\t"
- "ldc1 %[ub], 0x00(%[yuvcons_ptr]) \n\t"
- "or %[ub], %[ub], %[mask1] \n\t"
- "ldc1 %[bg], 0x80(%[yuvcons_ptr]) \n\t"
- "ldc1 %[ug], 0x20(%[yuvcons_ptr]) \n\t"
- "punpcklbh %[ug], %[ug], %[zero] \n\t"
- "pshufh %[ug], %[ug], %[zero] \n\t"
- "ldc1 %[vg], 0x20(%[yuvcons_ptr]) \n\t"
- "punpcklbh %[vg], %[vg], %[zero] \n\t"
- "pshufh %[vg], %[vg], %[five] \n\t"
- "ldc1 %[br], 0xa0(%[yuvcons_ptr]) \n\t"
- "ldc1 %[vr], 0x40(%[yuvcons_ptr]) \n\t"
- "punpcklbh %[vr], %[vr], %[zero] \n\t"
- "pshufh %[vr], %[vr], %[five] \n\t"
- "or %[vr], %[vr], %[mask1] \n\t"
-
- "1: \n\t"
- "gsldlc1 %[y], 0x07(%[yuy2_ptr]) \n\t"
- "gsldrc1 %[y], 0x00(%[yuy2_ptr]) \n\t"
- "psrlh %[temp], %[y], %[eight] \n\t"
- "pshufh %[u], %[temp], %[ushu] \n\t"
- "pshufh %[v], %[temp], %[vshu] \n\t"
-
- "psrlh %[temp], %[mask1], %[eight] \n\t"
- "and %[y], %[y], %[temp] \n\t"
- "psllh %[temp], %[y], %[eight] \n\t"
- "or %[y], %[y], %[temp] \n\t"
- "pmulhuh %[y], %[y], %[yg] \n\t"
-
- "paddsh %[b_vec], %[y], %[bb] \n\t"
- "pmullh %[temp], %[u], %[ub] \n\t"
- "psubsh %[b_vec], %[b_vec], %[temp] \n\t"
- "psrah %[b_vec], %[b_vec], %[six] \n\t"
-
- "paddsh %[g_vec], %[y], %[bg] \n\t"
- "pmullh %[temp], %[u], %[ug] \n\t"
- "psubsh %[g_vec], %[g_vec], %[temp] \n\t"
- "pmullh %[temp], %[v], %[vg] \n\t"
- "psubsh %[g_vec], %[g_vec], %[temp] \n\t"
- "psrah %[g_vec], %[g_vec], %[six] \n\t"
-
- "paddsh %[r_vec], %[y], %[br] \n\t"
- "pmullh %[temp], %[v], %[vr] \n\t"
- "psubsh %[r_vec], %[r_vec], %[temp] \n\t"
- "psrah %[r_vec], %[r_vec], %[six] \n\t"
-
- "packushb %[r_vec], %[b_vec], %[r_vec] \n\t"
- "packushb %[g_vec], %[g_vec], %[zero] \n\t"
- "punpcklwd %[g_vec], %[g_vec], %[alpha] \n\t"
- "punpcklbh %[b_vec], %[r_vec], %[g_vec] \n\t"
- "punpckhbh %[r_vec], %[r_vec], %[g_vec] \n\t"
- "punpcklhw %[g_vec], %[b_vec], %[r_vec] \n\t"
- "punpckhhw %[b_vec], %[b_vec], %[r_vec] \n\t"
-
- "gssdlc1 %[g_vec], 0x07(%[rgbbuf_ptr]) \n\t"
- "gssdrc1 %[g_vec], 0x00(%[rgbbuf_ptr]) \n\t"
- "gssdlc1 %[b_vec], 0x0f(%[rgbbuf_ptr]) \n\t"
- "gssdrc1 %[b_vec], 0x08(%[rgbbuf_ptr]) \n\t"
-
- "daddiu %[yuy2_ptr], %[yuy2_ptr], 0x08 \n\t"
- "daddiu %[rgbbuf_ptr], %[rgbbuf_ptr], 0x10 \n\t"
- "daddi %[width], %[width], -0x04 \n\t"
- "bnez %[width], 1b \n\t"
-
- : [y]"=&f"(y), [u]"=&f"(u),
- [v]"=&f"(v),
- [b_vec]"=&f"(b_vec), [g_vec]"=&f"(g_vec),
- [r_vec]"=&f"(r_vec), [temp]"=&f"(temp),
- [ub]"=&f"(ub), [ug]"=&f"(ug),
- [vg]"=&f"(vg), [vr]"=&f"(vr),
- [bb]"=&f"(bb), [bg]"=&f"(bg),
- [br]"=&f"(br), [yg]"=&f"(yg)
- : [yuy2_ptr]"r"(src_yuy2), [rgbbuf_ptr]"r"(rgb_buf),
- [yuvcons_ptr]"r"(yuvconstants), [width]"r"(width),
- [zero]"f"(0x00), [five]"f"(0x55),
- [six]"f"(0x6), [mask1]"f"(0xff00ff00ff00ff00),
- [ushu]"f"(0xA0), [vshu]"f"(0xf5),
- [alpha]"f"(-1), [eight]"f"(0x8)
- : "memory"
- );
-}
-
-void UYVYToARGBRow_MMI(const uint8_t* src_uyvy,
- uint8_t* rgb_buf,
- const struct YuvConstants* yuvconstants,
- int width) {
- uint64_t y, u, v;
- uint64_t b_vec, g_vec, r_vec, temp;
- uint64_t ub,ug,vg,vr,bb,bg,br,yg;
-
- __asm__ volatile(
- "ldc1 %[yg], 0xc0(%[yuvcons_ptr]) \n\t"
- "ldc1 %[bb], 0x60(%[yuvcons_ptr]) \n\t"
- "ldc1 %[ub], 0x00(%[yuvcons_ptr]) \n\t"
- "or %[ub], %[ub], %[mask1] \n\t"
- "ldc1 %[bg], 0x80(%[yuvcons_ptr]) \n\t"
- "ldc1 %[ug], 0x20(%[yuvcons_ptr]) \n\t"
- "punpcklbh %[ug], %[ug], %[zero] \n\t"
- "pshufh %[ug], %[ug], %[zero] \n\t"
- "ldc1 %[vg], 0x20(%[yuvcons_ptr]) \n\t"
- "punpcklbh %[vg], %[vg], %[zero] \n\t"
- "pshufh %[vg], %[vg], %[five] \n\t"
- "ldc1 %[br], 0xa0(%[yuvcons_ptr]) \n\t"
- "ldc1 %[vr], 0x40(%[yuvcons_ptr]) \n\t"
- "punpcklbh %[vr], %[vr], %[zero] \n\t"
- "pshufh %[vr], %[vr], %[five] \n\t"
- "or %[vr], %[vr], %[mask1] \n\t"
-
- "1: \n\t"
- "gsldlc1 %[y], 0x07(%[uyvy_ptr]) \n\t"
- "gsldrc1 %[y], 0x00(%[uyvy_ptr]) \n\t"
- "psrlh %[temp], %[mask1], %[eight] \n\t"
- "and %[temp], %[y], %[temp] \n\t"
- "pshufh %[u], %[temp], %[ushu] \n\t"
- "pshufh %[v], %[temp], %[vshu] \n\t"
-
- "psrlh %[y], %[y], %[eight] \n\t"
- "psllh %[temp], %[y], %[eight] \n\t"
- "or %[y], %[y], %[temp] \n\t"
- "pmulhuh %[y], %[y], %[yg] \n\t"
-
- "paddsh %[b_vec], %[y], %[bb] \n\t"
- "pmullh %[temp], %[u], %[ub] \n\t"
- "psubsh %[b_vec], %[b_vec], %[temp] \n\t"
- "psrah %[b_vec], %[b_vec], %[six] \n\t"
-
- "paddsh %[g_vec], %[y], %[bg] \n\t"
- "pmullh %[temp], %[u], %[ug] \n\t"
- "psubsh %[g_vec], %[g_vec], %[temp] \n\t"
- "pmullh %[temp], %[v], %[vg] \n\t"
- "psubsh %[g_vec], %[g_vec], %[temp] \n\t"
- "psrah %[g_vec], %[g_vec], %[six] \n\t"
-
- "paddsh %[r_vec], %[y], %[br] \n\t"
- "pmullh %[temp], %[v], %[vr] \n\t"
- "psubsh %[r_vec], %[r_vec], %[temp] \n\t"
- "psrah %[r_vec], %[r_vec], %[six] \n\t"
-
- "packushb %[r_vec], %[b_vec], %[r_vec] \n\t"
- "packushb %[g_vec], %[g_vec], %[zero] \n\t"
- "punpcklwd %[g_vec], %[g_vec], %[alpha] \n\t"
- "punpcklbh %[b_vec], %[r_vec], %[g_vec] \n\t"
- "punpckhbh %[r_vec], %[r_vec], %[g_vec] \n\t"
- "punpcklhw %[g_vec], %[b_vec], %[r_vec] \n\t"
- "punpckhhw %[b_vec], %[b_vec], %[r_vec] \n\t"
-
- "gssdlc1 %[g_vec], 0x07(%[rgbbuf_ptr]) \n\t"
- "gssdrc1 %[g_vec], 0x00(%[rgbbuf_ptr]) \n\t"
- "gssdlc1 %[b_vec], 0x0f(%[rgbbuf_ptr]) \n\t"
- "gssdrc1 %[b_vec], 0x08(%[rgbbuf_ptr]) \n\t"
-
- "daddiu %[uyvy_ptr], %[uyvy_ptr], 0x08 \n\t"
- "daddiu %[rgbbuf_ptr], %[rgbbuf_ptr], 0x10 \n\t"
- "daddi %[width], %[width], -0x04 \n\t"
- "bnez %[width], 1b \n\t"
-
- : [y]"=&f"(y), [u]"=&f"(u),
- [v]"=&f"(v),
- [b_vec]"=&f"(b_vec), [g_vec]"=&f"(g_vec),
- [r_vec]"=&f"(r_vec), [temp]"=&f"(temp),
- [ub]"=&f"(ub), [ug]"=&f"(ug),
- [vg]"=&f"(vg), [vr]"=&f"(vr),
- [bb]"=&f"(bb), [bg]"=&f"(bg),
- [br]"=&f"(br), [yg]"=&f"(yg)
- : [uyvy_ptr]"r"(src_uyvy), [rgbbuf_ptr]"r"(rgb_buf),
- [yuvcons_ptr]"r"(yuvconstants), [width]"r"(width),
- [zero]"f"(0x00), [five]"f"(0x55),
- [six]"f"(0x6), [mask1]"f"(0xff00ff00ff00ff00),
- [ushu]"f"(0xA0), [vshu]"f"(0xf5),
- [alpha]"f"(-1), [eight]"f"(0x8)
- : "memory"
- );
-}
-
-void I422ToRGBARow_MMI(const uint8_t* src_y,
- const uint8_t* src_u,
- const uint8_t* src_v,
- uint8_t* rgb_buf,
- const struct YuvConstants* yuvconstants,
- int width) {
- uint64_t y, u, v;
- uint64_t b_vec, g_vec, r_vec, temp;
- uint64_t ub,ug,vg,vr,bb,bg,br,yg;
-
- __asm__ volatile(
- "ldc1 %[yg], 0xc0(%[yuvcons_ptr]) \n\t"
- "ldc1 %[bb], 0x60(%[yuvcons_ptr]) \n\t"
- "ldc1 %[ub], 0x00(%[yuvcons_ptr]) \n\t"
- "or %[ub], %[ub], %[mask1] \n\t"
- "ldc1 %[bg], 0x80(%[yuvcons_ptr]) \n\t"
- "ldc1 %[ug], 0x20(%[yuvcons_ptr]) \n\t"
- "punpcklbh %[ug], %[ug], %[zero] \n\t"
- "pshufh %[ug], %[ug], %[zero] \n\t"
- "ldc1 %[vg], 0x20(%[yuvcons_ptr]) \n\t"
- "punpcklbh %[vg], %[vg], %[zero] \n\t"
- "pshufh %[vg], %[vg], %[five] \n\t"
- "ldc1 %[br], 0xa0(%[yuvcons_ptr]) \n\t"
- "ldc1 %[vr], 0x40(%[yuvcons_ptr]) \n\t"
- "punpcklbh %[vr], %[vr], %[zero] \n\t"
- "pshufh %[vr], %[vr], %[five] \n\t"
- "or %[vr], %[vr], %[mask1] \n\t"
-
- "1: \n\t"
- "gslwlc1 %[y], 0x03(%[y_ptr]) \n\t"
- "gslwrc1 %[y], 0x00(%[y_ptr]) \n\t"
- "gslwlc1 %[u], 0x03(%[u_ptr]) \n\t"
- "gslwrc1 %[u], 0x00(%[u_ptr]) \n\t"
- "gslwlc1 %[v], 0x03(%[v_ptr]) \n\t"
- "gslwrc1 %[v], 0x00(%[v_ptr]) \n\t"
-
- "punpcklbh %[y], %[y], %[y] \n\t"
- "pmulhuh %[y], %[y], %[yg] \n\t"
-
- "punpcklbh %[u], %[u], %[u] \n\t"
- "punpcklbh %[u], %[u], %[zero] \n\t"
- "paddsh %[b_vec], %[y], %[bb] \n\t"
- "pmullh %[temp], %[u], %[ub] \n\t"
- "psubsh %[b_vec], %[b_vec], %[temp] \n\t"
- "psrah %[b_vec], %[b_vec], %[six] \n\t"
-
- "punpcklbh %[v], %[v], %[v] \n\t"
- "punpcklbh %[v], %[v], %[zero] \n\t"
- "paddsh %[g_vec], %[y], %[bg] \n\t"
- "pmullh %[temp], %[u], %[ug] \n\t"
- "psubsh %[g_vec], %[g_vec], %[temp] \n\t"
- "pmullh %[temp], %[v], %[vg] \n\t"
- "psubsh %[g_vec], %[g_vec], %[temp] \n\t"
- "psrah %[g_vec], %[g_vec], %[six] \n\t"
-
- "paddsh %[r_vec], %[y], %[br] \n\t"
- "pmullh %[temp], %[v], %[vr] \n\t"
- "psubsh %[r_vec], %[r_vec], %[temp] \n\t"
- "psrah %[r_vec], %[r_vec], %[six] \n\t"
-
- "packushb %[r_vec], %[b_vec], %[r_vec] \n\t"
- "packushb %[g_vec], %[g_vec], %[zero] \n\t"
- "punpcklwd %[g_vec], %[alpha], %[g_vec] \n\t"
- "punpcklbh %[b_vec], %[g_vec], %[r_vec] \n\t"
- "punpckhbh %[r_vec], %[g_vec], %[r_vec] \n\t"
- "punpcklhw %[g_vec], %[b_vec], %[r_vec] \n\t"
- "punpckhhw %[b_vec], %[b_vec], %[r_vec] \n\t"
-
- "gssdlc1 %[g_vec], 0x07(%[rgbbuf_ptr]) \n\t"
- "gssdrc1 %[g_vec], 0x00(%[rgbbuf_ptr]) \n\t"
- "gssdlc1 %[b_vec], 0x0f(%[rgbbuf_ptr]) \n\t"
- "gssdrc1 %[b_vec], 0x08(%[rgbbuf_ptr]) \n\t"
-
- "daddiu %[y_ptr], %[y_ptr], 0x04 \n\t"
- "daddiu %[u_ptr], %[u_ptr], 0x02 \n\t"
- "daddiu %[v_ptr], %[v_ptr], 0x02 \n\t"
- "daddiu %[rgbbuf_ptr], %[rgbbuf_ptr], 0x10 \n\t"
- "daddi %[width], %[width], -0x04 \n\t"
- "bnez %[width], 1b \n\t"
-
- : [y]"=&f"(y), [u]"=&f"(u),
- [v]"=&f"(v),
- [b_vec]"=&f"(b_vec), [g_vec]"=&f"(g_vec),
- [r_vec]"=&f"(r_vec), [temp]"=&f"(temp),
- [ub]"=&f"(ub), [ug]"=&f"(ug),
- [vg]"=&f"(vg), [vr]"=&f"(vr),
- [bb]"=&f"(bb), [bg]"=&f"(bg),
- [br]"=&f"(br), [yg]"=&f"(yg)
- : [y_ptr]"r"(src_y), [u_ptr]"r"(src_u),
- [v_ptr]"r"(src_v), [rgbbuf_ptr]"r"(rgb_buf),
- [yuvcons_ptr]"r"(yuvconstants), [width]"r"(width),
- [zero]"f"(0x00), [five]"f"(0x55),
- [six]"f"(0x6), [mask1]"f"(0xff00ff00ff00ff00),
- [alpha]"f"(-1)
- : "memory"
- );
-}
-
-void ARGBSetRow_MMI(uint8_t* dst_argb, uint32_t v32, int width) {
- __asm__ volatile (
- "punpcklwd %[v32], %[v32], %[v32] \n\t"
- "1: \n\t"
- "gssdlc1 %[v32], 0x07(%[dst_ptr]) \n\t"
- "gssdrc1 %[v32], 0x00(%[dst_ptr]) \n\t"
- "gssdlc1 %[v32], 0x0f(%[dst_ptr]) \n\t"
- "gssdrc1 %[v32], 0x08(%[dst_ptr]) \n\t"
-
- "daddi %[width], %[width], -0x04 \n\t"
- "daddiu %[dst_ptr], %[dst_ptr], 0x10 \n\t"
- "bnez %[width], 1b \n\t"
- : [v32]"+&f"(v32)
- : [dst_ptr]"r"(dst_argb), [width]"r"(width)
- : "memory"
- );
-}
-// clang-format on
-
-// 10 bit YUV to ARGB
-#endif // !defined(LIBYUV_DISABLE_MMI) && defined(_MIPS_ARCH_LOONGSON3A)
-
-#ifdef __cplusplus
-} // extern "C"
-} // namespace libyuv
-#endif
diff --git a/files/source/scale_mmi.cc b/files/source/scale_mmi.cc
deleted file mode 100644
index 1226ef3e..00000000
--- a/files/source/scale_mmi.cc
+++ /dev/null
@@ -1,1168 +0,0 @@
-/*
- * Copyright 2013 The LibYuv Project Authors. All rights reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "libyuv/scale.h"
-
-#include <assert.h>
-#include <string.h>
-
-#include "libyuv/cpu_id.h"
-#include "libyuv/planar_functions.h" // For CopyARGB
-#include "libyuv/row.h"
-#include "libyuv/scale_row.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-// This module is for Mips MMI.
-#if !defined(LIBYUV_DISABLE_MMI) && defined(_MIPS_ARCH_LOONGSON3A)
-
-// clang-format off
-
-// CPU agnostic row functions
-void ScaleRowDown2_MMI(const uint8_t* src_ptr,
- ptrdiff_t src_stride,
- uint8_t* dst,
- int dst_width) {
- (void)src_stride;
-
- uint64_t src0, src1, dest;
- const uint64_t shift = 0x8ULL;
-
- __asm__ volatile(
- "1: \n\t"
- "gsldrc1 %[src0], 0x00(%[src_ptr]) \n\t"
- "gsldlc1 %[src0], 0x07(%[src_ptr]) \n\t"
- "psrlh %[src0], %[src0], %[shift] \n\t"
-
- "gsldrc1 %[src1], 0x08(%[src_ptr]) \n\t"
- "gsldlc1 %[src1], 0x0f(%[src_ptr]) \n\t"
- "psrlh %[src1], %[src1], %[shift] \n\t"
-
- "packushb %[dest], %[src0], %[src1] \n\t"
- "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
- "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
-
- "daddiu %[src_ptr], %[src_ptr], 0x10 \n\t"
- "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t"
- "daddi %[width], %[width], -0x08 \n\t"
- "bnez %[width], 1b \n\t"
- : [src0] "=&f"(src0), [src1] "=&f"(src1), [dest] "=&f"(dest)
- : [src_ptr] "r"(src_ptr), [dst_ptr] "r"(dst), [width] "r"(dst_width),
- [shift] "f"(shift)
- : "memory");
-}
-
-void ScaleRowDown2Linear_MMI(const uint8_t* src_ptr,
- ptrdiff_t src_stride,
- uint8_t* dst,
- int dst_width) {
- (void)src_stride;
-
- uint64_t src0, src1;
- uint64_t dest, dest0, dest1;
-
- const uint64_t mask = 0x00ff00ff00ff00ffULL;
- const uint64_t shift = 0x8ULL;
-
- __asm__ volatile(
- "1: \n\t"
- "gsldrc1 %[src0], 0x00(%[src_ptr]) \n\t"
- "gsldlc1 %[src0], 0x07(%[src_ptr]) \n\t"
- "and %[dest0], %[src0], %[mask] \n\t"
- "gsldrc1 %[src1], 0x08(%[src_ptr]) \n\t"
- "gsldlc1 %[src1], 0x0f(%[src_ptr]) \n\t"
- "and %[dest1], %[src1], %[mask] \n\t"
- "packushb %[dest0], %[dest0], %[dest1] \n\t"
-
- "psrlh %[src0], %[src0], %[shift] \n\t"
- "psrlh %[src1], %[src1], %[shift] \n\t"
- "packushb %[dest1], %[src0], %[src1] \n\t"
-
- "pavgb %[dest], %[dest0], %[dest1] \n\t"
- "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
- "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
-
- "daddiu %[src_ptr], %[src_ptr], 0x10 \n\t"
- "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t"
- "daddi %[width], %[width], -0x08 \n\t"
- "bnez %[width], 1b \n\t"
- : [src0] "=&f"(src0), [src1] "=&f"(src1), [dest0] "=&f"(dest0),
- [dest1] "=&f"(dest1), [dest] "=&f"(dest)
- : [src_ptr] "r"(src_ptr), [dst_ptr] "r"(dst), [mask] "f"(mask),
- [shift] "f"(shift), [width] "r"(dst_width)
- : "memory");
-}
-
-void ScaleRowDown2Box_MMI(const uint8_t* src_ptr,
- ptrdiff_t src_stride,
- uint8_t* dst,
- int dst_width) {
- const uint8_t* s = src_ptr;
- const uint8_t* t = src_ptr + src_stride;
-
- uint64_t s0, s1, t0, t1;
- uint64_t dest, dest0, dest1;
-
- const uint64_t ph = 0x0002000200020002ULL;
- const uint64_t mask = 0x00ff00ff00ff00ffULL;
- const uint64_t shift0 = 0x2ULL;
- const uint64_t shift1 = 0x8ULL;
-
- __asm__ volatile(
- "1: \n\t"
- "gsldrc1 %[s0], 0x00(%[s]) \n\t"
- "gsldlc1 %[s0], 0x07(%[s]) \n\t"
- "psrlh %[s1], %[s0], %[shift1] \n\t"
- "and %[s0], %[s0], %[mask] \n\t"
-
- "gsldrc1 %[t0], 0x00(%[t]) \n\t"
- "gsldlc1 %[t0], 0x07(%[t]) \n\t"
- "psrlh %[t1], %[t0], %[shift1] \n\t"
- "and %[t0], %[t0], %[mask] \n\t"
-
- "paddh %[dest0], %[s0], %[s1] \n\t"
- "paddh %[dest0], %[dest0], %[t0] \n\t"
- "paddh %[dest0], %[dest0], %[t1] \n\t"
- "paddh %[dest0], %[dest0], %[ph] \n\t"
- "psrlh %[dest0], %[dest0], %[shift0] \n\t"
-
- "gsldrc1 %[s0], 0x08(%[s]) \n\t"
- "gsldlc1 %[s0], 0x0f(%[s]) \n\t"
- "psrlh %[s1], %[s0], %[shift1] \n\t"
- "and %[s0], %[s0], %[mask] \n\t"
-
- "gsldrc1 %[t0], 0x08(%[t]) \n\t"
- "gsldlc1 %[t0], 0x0f(%[t]) \n\t"
- "psrlh %[t1], %[t0], %[shift1] \n\t"
- "and %[t0], %[t0], %[mask] \n\t"
-
- "paddh %[dest1], %[s0], %[s1] \n\t"
- "paddh %[dest1], %[dest1], %[t0] \n\t"
- "paddh %[dest1], %[dest1], %[t1] \n\t"
- "paddh %[dest1], %[dest1], %[ph] \n\t"
- "psrlh %[dest1], %[dest1], %[shift0] \n\t"
-
- "packushb %[dest], %[dest0], %[dest1] \n\t"
- "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
- "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
-
- "daddiu %[s], %[s], 0x10 \n\t"
- "daddiu %[t], %[t], 0x10 \n\t"
- "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t"
- "daddi %[width], %[width], -0x08 \n\t"
- "bnez %[width], 1b \n\t"
- : [s0] "=&f"(s0), [s1] "=&f"(s1), [t0] "=&f"(t0), [t1] "=&f"(t1),
- [dest0] "=&f"(dest0), [dest1] "=&f"(dest1), [dest] "=&f"(dest)
- : [s] "r"(s), [t] "r"(t), [dst_ptr] "r"(dst), [width] "r"(dst_width),
- [shift0] "f"(shift0), [shift1] "f"(shift1), [ph] "f"(ph),
- [mask] "f"(mask)
- : "memory");
-}
-
-void ScaleARGBRowDown2_MMI(const uint8_t* src_argb,
- ptrdiff_t src_stride,
- uint8_t* dst_argb,
- int dst_width) {
- (void)src_stride;
-
- const uint32_t* src = (const uint32_t*)(src_argb);
- uint32_t* dst = (uint32_t*)(dst_argb);
-
- uint64_t src0, src1, dest;
-
- __asm__ volatile(
- "1: \n\t"
- "gsldrc1 %[src0], 0x00(%[src_ptr]) \n\t"
- "gsldlc1 %[src0], 0x07(%[src_ptr]) \n\t"
- "gsldrc1 %[src1], 0x08(%[src_ptr]) \n\t"
- "gsldlc1 %[src1], 0x0f(%[src_ptr]) \n\t"
- "punpckhwd %[dest], %[src0], %[src1] \n\t"
-
- "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
- "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
-
- "daddiu %[src_ptr], %[src_ptr], 0x10 \n\t"
- "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t"
- "daddi %[width], %[width], -0x02 \n\t"
- "bnez %[width], 1b \n\t"
- : [src0] "=&f"(src0), [src1] "=&f"(src1), [dest] "=&f"(dest)
- : [src_ptr] "r"(src), [dst_ptr] "r"(dst), [width] "r"(dst_width)
- : "memory");
-}
-
-void ScaleARGBRowDown2Linear_MMI(const uint8_t* src_argb,
- ptrdiff_t src_stride,
- uint8_t* dst_argb,
- int dst_width) {
- (void)src_stride;
-
- uint64_t src0, src1;
- uint64_t dest, dest_hi, dest_lo;
-
- __asm__ volatile(
- "1: \n\t"
- "lwc1 %[src0], 0x00(%[src_ptr]) \n\t"
- "lwc1 %[src1], 0x08(%[src_ptr]) \n\t"
- "punpcklwd %[dest_lo], %[src0], %[src1] \n\t"
- "lwc1 %[src0], 0x04(%[src_ptr]) \n\t"
- "lwc1 %[src1], 0x0c(%[src_ptr]) \n\t"
- "punpcklwd %[dest_hi], %[src0], %[src1] \n\t"
-
- "pavgb %[dest], %[dest_lo], %[dest_hi] \n\t"
- "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
- "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
-
- "daddiu %[src_ptr], %[src_ptr], 0x10 \n\t"
- "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t"
- "daddi %[width], %[width], -0x02 \n\t"
- "bnez %[width], 1b \n\t"
- : [src0] "=&f"(src0), [src1] "=&f"(src1), [dest_hi] "=&f"(dest_hi),
- [dest_lo] "=&f"(dest_lo), [dest] "=&f"(dest)
- : [src_ptr] "r"(src_argb), [dst_ptr] "r"(dst_argb), [width] "r"(dst_width)
- : "memory");
-}
-
-void ScaleARGBRowDown2Box_MMI(const uint8_t* src_argb,
- ptrdiff_t src_stride,
- uint8_t* dst_argb,
- int dst_width) {
- const uint8_t* s = src_argb;
- const uint8_t* t = src_argb + src_stride;
-
- uint64_t s0, s_hi, s_lo;
- uint64_t t0, t_hi, t_lo;
- uint64_t dest, dest_hi, dest_lo;
-
- const uint64_t mask = 0x0ULL;
- const uint64_t ph = 0x0002000200020002ULL;
- const uint64_t shfit = 0x2ULL;
-
- __asm__ volatile(
- "1: \n\t"
- "gsldrc1 %[s0], 0x00(%[s]) \n\t"
- "gsldlc1 %[s0], 0x07(%[s]) \n\t"
- "punpcklbh %[s_lo], %[s0], %[mask] \n\t"
- "punpckhbh %[s_hi], %[s0], %[mask] \n\t"
- "paddh %[dest_lo], %[s_lo], %[s_hi] \n\t"
-
- "gsldrc1 %[t0], 0x00(%[t]) \n\t"
- "gsldlc1 %[t0], 0x07(%[t]) \n\t"
- "punpcklbh %[t_lo], %[t0], %[mask] \n\t"
- "punpckhbh %[t_hi], %[t0], %[mask] \n\t"
- "paddh %[dest_lo], %[dest_lo], %[t_lo] \n\t"
- "paddh %[dest_lo], %[dest_lo], %[t_hi] \n\t"
-
- "paddh %[dest_lo], %[dest_lo], %[ph] \n\t"
- "psrlh %[dest_lo], %[dest_lo], %[shfit] \n\t"
-
- "gsldrc1 %[s0], 0x08(%[s]) \n\t"
- "gsldlc1 %[s0], 0x0f(%[s]) \n\t"
- "punpcklbh %[s_lo], %[s0], %[mask] \n\t"
- "punpckhbh %[s_hi], %[s0], %[mask] \n\t"
- "paddh %[dest_hi], %[s_lo], %[s_hi] \n\t"
-
- "gsldrc1 %[t0], 0x08(%[t]) \n\t"
- "gsldlc1 %[t0], 0x0f(%[t]) \n\t"
- "punpcklbh %[t_lo], %[t0], %[mask] \n\t"
- "punpckhbh %[t_hi], %[t0], %[mask] \n\t"
- "paddh %[dest_hi], %[dest_hi], %[t_lo] \n\t"
- "paddh %[dest_hi], %[dest_hi], %[t_hi] \n\t"
-
- "paddh %[dest_hi], %[dest_hi], %[ph] \n\t"
- "psrlh %[dest_hi], %[dest_hi], %[shfit] \n\t"
-
- "packushb %[dest], %[dest_lo], %[dest_hi] \n\t"
- "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
- "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
-
- "daddiu %[s], %[s], 0x10 \n\t"
- "daddiu %[t], %[t], 0x10 \n\t"
- "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t"
- "daddi %[width], %[width], -0x02 \n\t"
- "bnez %[width], 1b \n\t"
- : [s0] "=&f"(s0), [t0] "=&f"(t0), [dest_hi] "=&f"(dest_hi),
- [dest_lo] "=&f"(dest_lo), [s_hi] "=&f"(s_hi), [s_lo] "=&f"(s_lo),
- [t_hi] "=&f"(t_hi), [t_lo] "=&f"(t_lo), [dest] "=&f"(dest)
- : [s] "r"(s), [t] "r"(t), [dst_ptr] "r"(dst_argb), [width] "r"(dst_width),
- [mask] "f"(mask), [ph] "f"(ph), [shfit] "f"(shfit)
- : "memory");
-}
-
-void ScaleRowDown2_16_MMI(const uint16_t* src_ptr,
- ptrdiff_t src_stride,
- uint16_t* dst,
- int dst_width) {
- (void)src_stride;
-
- uint64_t src0, src1, dest;
- const uint64_t shift = 0x10ULL;
-
- __asm__ volatile(
- "1: \n\t"
- "gsldrc1 %[src0], 0x00(%[src_ptr]) \n\t"
- "gsldlc1 %[src0], 0x07(%[src_ptr]) \n\t"
- "psrlw %[src0], %[src0], %[shift] \n\t"
-
- "gsldrc1 %[src1], 0x08(%[src_ptr]) \n\t"
- "gsldlc1 %[src1], 0x0f(%[src_ptr]) \n\t"
- "psrlw %[src1], %[src1], %[shift] \n\t"
-
- "packsswh %[dest], %[src0], %[src1] \n\t"
- "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
- "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
-
- "daddiu %[src_ptr], %[src_ptr], 0x10 \n\t"
- "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t"
- "daddi %[width], %[width], -0x04 \n\t"
- "bnez %[width], 1b \n\t"
- : [src0] "=&f"(src0), [src1] "=&f"(src1), [dest] "=&f"(dest)
- : [src_ptr] "r"(src_ptr), [dst_ptr] "r"(dst), [width] "r"(dst_width),
- [shift] "f"(shift)
- : "memory");
-}
-
-void ScaleRowDown2Linear_16_MMI(const uint16_t* src_ptr,
- ptrdiff_t src_stride,
- uint16_t* dst,
- int dst_width) {
- (void)src_stride;
-
- uint64_t src0, src1;
- uint64_t dest, dest_hi, dest_lo;
-
- __asm__ volatile(
- "1: \n\t"
- "gsldrc1 %[src0], 0x00(%[src_ptr]) \n\t"
- "gsldlc1 %[src0], 0x07(%[src_ptr]) \n\t"
- "gsldrc1 %[src1], 0x08(%[src_ptr]) \n\t"
- "gsldlc1 %[src1], 0x0f(%[src_ptr]) \n\t"
- "punpcklhw %[dest_lo], %[src0], %[src1] \n\t"
- "punpckhhw %[dest_hi], %[src0], %[src1] \n\t"
-
- "punpcklhw %[src0], %[dest_lo], %[dest_hi] \n\t"
- "punpckhhw %[src1], %[dest_lo], %[dest_hi] \n\t"
-
- "pavgh %[dest], %[src0], %[src1] \n\t"
- "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
- "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
-
- "daddiu %[src_ptr], %[src_ptr], 0x10 \n\t"
- "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t"
- "daddi %[width], %[width], -0x04 \n\t"
- "bnez %[width], 1b \n\t"
- : [src0] "=&f"(src0), [src1] "=&f"(src1), [dest_hi] "=&f"(dest_hi),
- [dest_lo] "=&f"(dest_lo), [dest] "=&f"(dest)
- : [src_ptr] "r"(src_ptr), [dst_ptr] "r"(dst), [width] "r"(dst_width)
- : "memory");
-}
-
-void ScaleRowDown2Box_16_MMI(const uint16_t* src_ptr,
- ptrdiff_t src_stride,
- uint16_t* dst,
- int dst_width) {
- const uint16_t* s = src_ptr;
- const uint16_t* t = src_ptr + src_stride;
-
- uint64_t s0, s1, s_hi, s_lo;
- uint64_t t0, t1, t_hi, t_lo;
- uint64_t dest, dest0, dest1;
-
- const uint64_t ph = 0x0000000200000002ULL;
- const uint64_t mask = 0x0000ffff0000ffffULL;
- const uint64_t shift0 = 0x10ULL;
- const uint64_t shift1 = 0x2ULL;
-
- __asm__ volatile(
- "1: \n\t"
- "gsldrc1 %[s0], 0x00(%[s]) \n\t"
- "gsldlc1 %[s0], 0x07(%[s]) \n\t"
- "psrlw %[s1], %[s0], %[shift0] \n\t"
- "and %[s0], %[s0], %[mask] \n\t"
-
- "gsldrc1 %[t0], 0x00(%[t]) \n\t"
- "gsldlc1 %[t0], 0x07(%[t]) \n\t"
- "psrlw %[t1], %[t0], %[shift0] \n\t"
- "and %[t0], %[t0], %[mask] \n\t"
-
- "paddw %[dest0], %[s0], %[s1] \n\t"
- "paddw %[dest0], %[dest0], %[t0] \n\t"
- "paddw %[dest0], %[dest0], %[t1] \n\t"
- "paddw %[dest0], %[dest0], %[ph] \n\t"
- "psrlw %[dest0], %[dest0], %[shift1] \n\t"
-
- "gsldrc1 %[s0], 0x08(%[s]) \n\t"
- "gsldlc1 %[s0], 0x0f(%[s]) \n\t"
- "psrlw %[s1], %[s0], %[shift0] \n\t"
- "and %[s0], %[s0], %[mask] \n\t"
-
- "gsldrc1 %[t0], 0x08(%[t]) \n\t"
- "gsldlc1 %[t0], 0x0f(%[t]) \n\t"
- "psrlw %[t1], %[t0], %[shift0] \n\t"
- "and %[t0], %[t0], %[mask] \n\t"
-
- "paddw %[dest1], %[s0], %[s1] \n\t"
- "paddw %[dest1], %[dest1], %[t0] \n\t"
- "paddw %[dest1], %[dest1], %[t1] \n\t"
- "paddw %[dest1], %[dest1], %[ph] \n\t"
- "psrlw %[dest1], %[dest1], %[shift1] \n\t"
-
- "packsswh %[dest], %[dest0], %[dest1] \n\t"
- "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
- "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
-
- "daddiu %[s], %[s], 0x10 \n\t"
- "daddiu %[t], %[t], 0x10 \n\t"
- "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t"
- "daddi %[width], %[width], -0x04 \n\t"
- "bnez %[width], 1b \n\t"
- : [s0] "=&f"(s0), [s1] "=&f"(s1), [t0] "=&f"(t0), [t1] "=&f"(t1),
- [s_hi] "=&f"(s_hi), [s_lo] "=&f"(s_lo), [t_hi] "=&f"(t_hi),
- [t_lo] "=&f"(t_lo), [dest0] "=&f"(dest0), [dest1] "=&f"(dest1),
- [dest] "=&f"(dest)
- : [s] "r"(s), [t] "r"(t), [dst_ptr] "r"(dst), [width] "r"(dst_width),
- [shift0] "f"(shift0), [shift1] "f"(shift1), [ph] "f"(ph),
- [mask] "f"(mask)
- : "memory");
-}
-
-void ScaleRowDown4_MMI(const uint8_t* src_ptr,
- ptrdiff_t src_stride,
- uint8_t* dst,
- int dst_width) {
- (void)src_stride;
-
- uint64_t src0, src1;
- uint64_t dest, dest_hi, dest_lo;
-
- const uint64_t shift = 0x10ULL;
- const uint64_t mask = 0x000000ff000000ffULL;
-
- __asm__ volatile(
- "1: \n\t"
- "gsldrc1 %[src0], 0x00(%[src_ptr]) \n\t"
- "gsldlc1 %[src0], 0x07(%[src_ptr]) \n\t"
- "psrlw %[src0], %[src0], %[shift] \n\t"
- "and %[src0], %[src0], %[mask] \n\t"
- "gsldrc1 %[src1], 0x08(%[src_ptr]) \n\t"
- "gsldlc1 %[src1], 0x0f(%[src_ptr]) \n\t"
- "psrlw %[src1], %[src1], %[shift] \n\t"
- "and %[src1], %[src1], %[mask] \n\t"
- "packsswh %[dest_lo], %[src0], %[src1] \n\t"
-
- "gsldrc1 %[src0], 0x10(%[src_ptr]) \n\t"
- "gsldlc1 %[src0], 0x17(%[src_ptr]) \n\t"
- "psrlw %[src0], %[src0], %[shift] \n\t"
- "and %[src0], %[src0], %[mask] \n\t"
- "gsldrc1 %[src1], 0x18(%[src_ptr]) \n\t"
- "gsldlc1 %[src1], 0x1f(%[src_ptr]) \n\t"
- "psrlw %[src1], %[src1], %[shift] \n\t"
- "and %[src1], %[src1], %[mask] \n\t"
- "packsswh %[dest_hi], %[src0], %[src1] \n\t"
-
- "packushb %[dest], %[dest_lo], %[dest_hi] \n\t"
- "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
- "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
-
- "daddiu %[src_ptr], %[src_ptr], 0x20 \n\t"
- "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t"
- "daddi %[width], %[width], -0x08 \n\t"
- "bnez %[width], 1b \n\t"
- : [src0] "=&f"(src0), [src1] "=&f"(src1), [dest_hi] "=&f"(dest_hi),
- [dest_lo] "=&f"(dest_lo), [dest] "=&f"(dest)
- : [src_ptr] "r"(src_ptr), [dst_ptr] "r"(dst), [width] "r"(dst_width),
- [shift] "f"(shift), [mask] "f"(mask)
- : "memory");
-}
-
-void ScaleRowDown4_16_MMI(const uint16_t* src_ptr,
- ptrdiff_t src_stride,
- uint16_t* dst,
- int dst_width) {
- (void)src_stride;
-
- uint64_t src0, src1;
- uint64_t dest, dest_hi, dest_lo;
-
- const uint64_t mask = 0x0ULL;
-
- __asm__ volatile(
- "1: \n\t"
- "gsldrc1 %[src0], 0x00(%[src_ptr]) \n\t"
- "gsldlc1 %[src0], 0x07(%[src_ptr]) \n\t"
- "gsldrc1 %[src1], 0x08(%[src_ptr]) \n\t"
- "gsldlc1 %[src1], 0x0f(%[src_ptr]) \n\t"
- "punpckhhw %[dest_lo], %[src0], %[src1] \n\t"
- "punpcklhw %[dest_lo], %[dest_lo], %[mask] \n\t"
-
- "gsldrc1 %[src0], 0x10(%[src_ptr]) \n\t"
- "gsldlc1 %[src0], 0x17(%[src_ptr]) \n\t"
- "gsldrc1 %[src1], 0x18(%[src_ptr]) \n\t"
- "gsldlc1 %[src1], 0x1f(%[src_ptr]) \n\t"
- "punpckhhw %[dest_hi], %[src0], %[src1] \n\t"
- "punpcklhw %[dest_hi], %[dest_hi], %[mask] \n\t"
-
- "packushb %[dest], %[dest_lo], %[dest_hi] \n\t"
- "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
- "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
-
- "daddiu %[src_ptr], %[src_ptr], 0x20 \n\t"
- "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t"
- "daddi %[width], %[width], -0x04 \n\t"
- "bnez %[width], 1b \n\t"
- : [src0] "=&f"(src0), [src1] "=&f"(src1), [dest_hi] "=&f"(dest_hi),
- [dest_lo] "=&f"(dest_lo), [dest] "=&f"(dest)
- : [src_ptr] "r"(src_ptr), [dst_ptr] "r"(dst), [width] "r"(dst_width),
- [mask] "f"(mask)
- : "memory");
-}
-
-#define DO_SCALEROWDOWN4BOX_PUNPCKADD() \
- "punpcklbh %[src_lo], %[src], %[mask0] \n\t" \
- "punpckhbh %[src_hi], %[src], %[mask0] \n\t" \
- "paddh %[dest_lo], %[dest_lo], %[src_lo] \n\t" \
- "paddh %[dest_hi], %[dest_hi], %[src_hi] \n\t"
-
-#define DO_SCALEROWDOWN4BOX_LOOP(reg) \
- "ldc1 %[src], 0x00(%[src0_ptr]) \n\t" \
- "punpcklbh %[dest_lo], %[src], %[mask0] \n\t" \
- "punpckhbh %[dest_hi], %[src], %[mask0] \n\t" \
- \
- "ldc1 %[src], 0x00(%[src1_ptr]) \n\t" \
- DO_SCALEROWDOWN4BOX_PUNPCKADD() \
- \
- "ldc1 %[src], 0x00(%[src2_ptr]) \n\t" \
- DO_SCALEROWDOWN4BOX_PUNPCKADD() \
- \
- "ldc1 %[src], 0x00(%[src3_ptr]) \n\t" \
- DO_SCALEROWDOWN4BOX_PUNPCKADD() \
- \
- "pmaddhw %[dest_lo], %[dest_lo], %[mask1] \n\t" \
- "pmaddhw %[dest_hi], %[dest_hi], %[mask1] \n\t" \
- "packsswh " #reg ", %[dest_lo], %[dest_hi] \n\t" \
- "pmaddhw " #reg ", " #reg ", %[mask1] \n\t" \
- "paddh " #reg ", " #reg ", %[ph] \n\t" \
- "psrlh " #reg ", " #reg ", %[shift] \n\t" \
- \
- "daddiu %[src0_ptr], %[src0_ptr], 0x08 \n\t" \
- "daddiu %[src1_ptr], %[src1_ptr], 0x08 \n\t" \
- "daddiu %[src2_ptr], %[src2_ptr], 0x08 \n\t" \
- "daddiu %[src3_ptr], %[src3_ptr], 0x08 \n\t"
-
-/* LibYUVScaleTest.ScaleDownBy4_Box */
-void ScaleRowDown4Box_MMI(const uint8_t* src_ptr,
- ptrdiff_t src_stride,
- uint8_t* dst,
- int dst_width) {
- const uint8_t* src0_ptr = src_ptr;
- const uint8_t* src1_ptr = src_ptr + src_stride;
- const uint8_t* src2_ptr = src_ptr + src_stride * 2;
- const uint8_t* src3_ptr = src_ptr + src_stride * 3;
-
- uint64_t src, src_hi, src_lo;
- uint64_t dest, dest_hi, dest_lo, dest0, dest1, dest2, dest3;
-
- const uint64_t mask0 = 0x0ULL;
- const uint64_t mask1 = 0x0001000100010001ULL;
- const uint64_t ph = 0x0008000800080008ULL;
- const uint64_t shift = 0x4ULL;
-
- __asm__ volatile(
- "1: \n\t"
-
- DO_SCALEROWDOWN4BOX_LOOP(%[dest0])
- DO_SCALEROWDOWN4BOX_LOOP(%[dest1])
- DO_SCALEROWDOWN4BOX_LOOP(%[dest2])
- DO_SCALEROWDOWN4BOX_LOOP(%[dest3])
-
- "packsswh %[dest_lo], %[dest0], %[dest1] \n\t"
- "packsswh %[dest_hi], %[dest2], %[dest3] \n\t"
-
- "packushb %[dest], %[dest_lo], %[dest_hi] \n\t"
- "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
- "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
-
- "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t"
- "daddi %[width], %[width], -0x08 \n\t"
- "bnez %[width], 1b \n\t"
- : [src_hi] "=&f"(src_hi), [src_lo] "=&f"(src_lo),
- [dest_hi] "=&f"(dest_hi), [dest_lo] "=&f"(dest_lo),
- [dest0] "=&f"(dest0), [dest1] "=&f"(dest1), [dest2] "=&f"(dest2),
- [dest3] "=&f"(dest3), [src] "=&f"(src), [dest] "=&f"(dest)
- : [src0_ptr] "r"(src0_ptr), [src1_ptr] "r"(src1_ptr),
- [src2_ptr] "r"(src2_ptr), [src3_ptr] "r"(src3_ptr), [dst_ptr] "r"(dst),
- [width] "r"(dst_width), [shift] "f"(shift), [mask0] "f"(mask0),
- [ph] "f"(ph), [mask1] "f"(mask1)
- : "memory");
-}
-
-#define DO_SCALEROWDOWN4BOX_16_PUNPCKADD() \
- "punpcklbh %[src_lo], %[src], %[mask0] \n\t" \
- "punpckhbh %[src_hi], %[src], %[mask0] \n\t" \
- "paddh %[dest_lo], %[dest_lo], %[src_lo] \n\t" \
- "paddh %[dest_hi], %[dest_hi], %[src_hi] \n\t"
-
-#define DO_SCALEROWDOWN4BOX_16_LOOP(reg) \
- "ldc1 %[src], 0x00(%[src0_ptr]) \n\t" \
- "punpcklbh %[dest_lo], %[src], %[mask0] \n\t" \
- "punpckhbh %[dest_hi], %[src], %[mask0] \n\t" \
- \
- "ldc1 %[src], 0x00(%[src1_ptr]) \n\t" \
- DO_SCALEROWDOWN4BOX_16_PUNPCKADD() \
- \
- "ldc1 %[src], 0x00(%[src2_ptr]) \n\t" \
- DO_SCALEROWDOWN4BOX_16_PUNPCKADD() \
- \
- "ldc1 %[src], 0x00(%[src3_ptr]) \n\t" \
- DO_SCALEROWDOWN4BOX_16_PUNPCKADD() \
- \
- "paddw %[dest], %[dest_lo], %[dest_hi] \n\t" \
- "punpckhwd %[dest_hi], %[dest], %[dest] \n\t" \
- "paddw %[dest], %[dest_hi], %[dest] \n\t" \
- "paddw %[dest], %[dest], %[ph] \n\t" \
- "psraw %[dest], %[dest], %[shift] \n\t" \
- "and " #reg ", %[dest], %[mask1] \n\t" \
- \
- "daddiu %[src0_ptr], %[src0_ptr], 0x08 \n\t" \
- "daddiu %[src1_ptr], %[src1_ptr], 0x08 \n\t" \
- "daddiu %[src2_ptr], %[src2_ptr], 0x08 \n\t" \
- "daddiu %[src3_ptr], %[src3_ptr], 0x08 \n\t"
-
-/* LibYUVScaleTest.ScaleDownBy4_Box_16 */
-void ScaleRowDown4Box_16_MMI(const uint16_t* src_ptr,
- ptrdiff_t src_stride,
- uint16_t* dst,
- int dst_width) {
- const uint16_t* src0_ptr = src_ptr;
- const uint16_t* src1_ptr = src_ptr + src_stride;
- const uint16_t* src2_ptr = src_ptr + src_stride * 2;
- const uint16_t* src3_ptr = src_ptr + src_stride * 3;
-
- uint64_t src, src_hi, src_lo;
- uint64_t dest, dest_hi, dest_lo, dest0, dest1, dest2, dest3;
-
- const uint64_t mask0 = 0x0ULL;
- const uint64_t mask1 = 0x00000000ffffffffULL;
- const uint64_t ph = 0x0000000800000008ULL;
- const uint64_t shift = 0x04ULL;
-
- __asm__ volatile(
- "1: \n\t"
-
- DO_SCALEROWDOWN4BOX_16_LOOP(%[dest0])
- DO_SCALEROWDOWN4BOX_16_LOOP(%[dest1])
- DO_SCALEROWDOWN4BOX_16_LOOP(%[dest2])
- DO_SCALEROWDOWN4BOX_16_LOOP(%[dest3])
- "punpcklwd %[dest_lo], %[dest0], %[dest1] \n\t"
- "punpcklwd %[dest_hi], %[dest2], %[dest3] \n\t"
-
- "packushb %[dest], %[dest_lo], %[dest_hi] \n\t"
- "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
- "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
-
- "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t"
- "daddi %[width], %[width], -0x04 \n\t"
- "bnez %[width], 1b \n\t"
- : [src_hi] "=&f"(src_hi), [src_lo] "=&f"(src_lo),
- [dest_hi] "=&f"(dest_hi), [dest_lo] "=&f"(dest_lo),
- [dest0] "=&f"(dest0), [dest1] "=&f"(dest1), [dest2] "=&f"(dest2),
- [dest3] "=&f"(dest3), [src] "=&f"(src), [dest] "=&f"(dest)
- : [src0_ptr] "r"(src0_ptr), [src1_ptr] "r"(src1_ptr),
- [src2_ptr] "r"(src2_ptr), [src3_ptr] "r"(src3_ptr), [dst_ptr] "r"(dst),
- [width] "r"(dst_width), [shift] "f"(shift), [mask0] "f"(mask0),
- [ph] "f"(ph), [mask1] "f"(mask1)
- : "memory");
-}
-
-// Scales a single row of pixels up by 2x using point sampling.
-void ScaleColsUp2_MMI(uint8_t* dst_ptr,
- const uint8_t* src_ptr,
- int dst_width,
- int x,
- int dx) {
- uint64_t src, dest;
-
- (void)x;
- (void)dx;
-
- __asm__ volatile(
- "1: \n\t"
- "lwc1 %[src], 0x00(%[src_ptr]) \n\t"
-
- "punpcklbh %[dest], %[src], %[src] \n\t"
- "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
- "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
-
- "daddiu %[src_ptr], %[src_ptr], 0x04 \n\t"
- "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t"
- "daddi %[width], %[width], -0x08 \n\t"
- "bnez %[width], 1b \n\t"
- : [src] "=&f"(src), [dest] "=&f"(dest)
- : [src_ptr] "r"(src_ptr), [dst_ptr] "r"(dst_ptr), [width] "r"(dst_width)
- : "memory");
-}
-
-void ScaleColsUp2_16_MMI(uint16_t* dst_ptr,
- const uint16_t* src_ptr,
- int dst_width,
- int x,
- int dx) {
- uint64_t src, dest;
-
- (void)x;
- (void)dx;
-
- __asm__ volatile(
- "1: \n\t"
- "gsldrc1 %[src], 0x00(%[src_ptr]) \n\t"
- "gsldlc1 %[src], 0x07(%[src_ptr]) \n\t"
-
- "punpcklhw %[dest], %[src], %[src] \n\t"
- "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
- "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
-
- "punpckhhw %[dest], %[src], %[src] \n\t"
- "gssdlc1 %[dest], 0x0f(%[dst_ptr]) \n\t"
- "gssdrc1 %[dest], 0x08(%[dst_ptr]) \n\t"
-
- "daddiu %[src_ptr], %[src_ptr], 0x08 \n\t"
- "daddiu %[dst_ptr], %[dst_ptr], 0x10 \n\t"
- "daddi %[width], %[width], -0x08 \n\t"
- "bnez %[width], 1b \n\t"
- : [src] "=&f"(src), [dest] "=&f"(dest)
- : [src_ptr] "r"(src_ptr), [dst_ptr] "r"(dst_ptr), [width] "r"(dst_width)
- : "memory");
-}
-
-void ScaleAddRow_MMI(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width) {
- uint64_t src, src_hi, src_lo, dest0, dest1;
- const uint64_t mask = 0x0ULL;
-
- __asm__ volatile(
- "1: \n\t"
- "gsldlc1 %[src], 0x07(%[src_ptr]) \n\t"
- "gsldrc1 %[src], 0x00(%[src_ptr]) \n\t"
- "punpcklbh %[src_lo], %[src], %[mask] \n\t"
- "punpckhbh %[src_hi], %[src], %[mask] \n\t"
-
- "gsldrc1 %[dest0], 0x00(%[dst_ptr]) \n\t"
- "gsldlc1 %[dest0], 0x07(%[dst_ptr]) \n\t"
- "paddush %[dest0], %[dest0], %[src_lo] \n\t"
- "gsldrc1 %[dest1], 0x08(%[dst_ptr]) \n\t"
- "gsldlc1 %[dest1], 0x0f(%[dst_ptr]) \n\t"
- "paddush %[dest1], %[dest1], %[src_hi] \n\t"
-
- "gssdlc1 %[dest0], 0x07(%[dst_ptr]) \n\t"
- "gssdrc1 %[dest0], 0x00(%[dst_ptr]) \n\t"
- "gssdlc1 %[dest1], 0x0f(%[dst_ptr]) \n\t"
- "gssdrc1 %[dest1], 0x08(%[dst_ptr]) \n\t"
-
- "daddiu %[src_ptr], %[src_ptr], 0x08 \n\t"
- "daddiu %[dst_ptr], %[dst_ptr], 0x10 \n\t"
- "daddi %[width], %[width], -0x08 \n\t"
- "bnez %[width], 1b \n\t"
- : [dest0] "=&f"(dest0), [dest1] "=&f"(dest1), [src_hi] "=&f"(src_hi),
- [src_lo] "=&f"(src_lo), [src] "=&f"(src)
- : [src_ptr] "r"(src_ptr), [dst_ptr] "r"(dst_ptr), [width] "r"(src_width),
- [mask] "f"(mask)
- : "memory");
-}
-
-void ScaleAddRow_16_MMI(const uint16_t* src_ptr,
- uint32_t* dst_ptr,
- int src_width) {
- uint64_t src, src_hi, src_lo, dest0, dest1;
- const uint64_t mask = 0x0ULL;
-
- __asm__ volatile(
- "1: \n\t"
- "gsldrc1 %[src], 0x00(%[src_ptr]) \n\t"
- "gsldlc1 %[src], 0x07(%[src_ptr]) \n\t"
- "punpcklhw %[src_lo], %[src], %[mask] \n\t"
- "punpckhhw %[src_hi], %[src], %[mask] \n\t"
-
- "gsldrc1 %[dest0], 0x00(%[dst_ptr]) \n\t"
- "gsldlc1 %[dest0], 0x07(%[dst_ptr]) \n\t"
- "paddw %[dest0], %[dest0], %[src_lo] \n\t"
- "gssdlc1 %[dest0], 0x07(%[dst_ptr]) \n\t"
- "gssdrc1 %[dest0], 0x00(%[dst_ptr]) \n\t"
-
- "gsldrc1 %[dest1], 0x08(%[dst_ptr]) \n\t"
- "gsldlc1 %[dest1], 0x0f(%[dst_ptr]) \n\t"
- "paddw %[dest1], %[dest1], %[src_hi] \n\t"
- "gssdlc1 %[dest1], 0x0f(%[dst_ptr]) \n\t"
- "gssdrc1 %[dest1], 0x08(%[dst_ptr]) \n\t"
-
- "daddiu %[src_ptr], %[src_ptr], 0x08 \n\t"
- "daddiu %[dst_ptr], %[dst_ptr], 0x10 \n\t"
- "daddi %[width], %[width], -0x04 \n\t"
- "bnez %[width], 1b \n\t"
- : [dest0] "=&f"(dest0), [dest1] "=&f"(dest1), [src_hi] "=&f"(src_hi),
- [src_lo] "=&f"(src_lo), [src] "=&f"(src)
- : [src_ptr] "r"(src_ptr), [dst_ptr] "r"(dst_ptr), [width] "r"(src_width),
- [mask] "f"(mask)
- : "memory");
-}
-
-void ScaleARGBRowDownEven_MMI(const uint8_t* src_argb,
- ptrdiff_t src_stride,
- int src_stepx,
- uint8_t* dst_argb,
- int dst_width) {
- (void)src_stride;
-
- uint64_t src0, src1, dest;
-
- __asm__ volatile(
- "1: \n\t"
- "lwc1 %[src0], 0x00(%[src_ptr]) \n\t"
- "dadd %[src_ptr], %[src_ptr], %[src_stepx_4]\n\t"
- "lwc1 %[src1], 0x00(%[src_ptr]) \n\t"
- "punpcklwd %[dest], %[src0], %[src1] \n\t"
-
- "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
- "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
-
- "dadd %[src_ptr], %[src_ptr], %[src_stepx_4]\n\t"
- "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t"
- "daddi %[width], %[width], -0x02 \n\t"
- "bnez %[width], 1b \n\t"
- : [src0] "=&f"(src0), [src1] "=&f"(src1), [dest] "=&f"(dest)
- : [src_ptr] "r"(src_argb), [dst_ptr] "r"(dst_argb),
- [src_stepx_4] "r"(src_stepx << 2), [width] "r"(dst_width)
- : "memory");
-}
-
-void ScaleARGBRowDownEvenBox_MMI(const uint8_t* src_argb,
- ptrdiff_t src_stride,
- int src_stepx,
- uint8_t* dst_argb,
- int dst_width) {
- const uint8_t* src0_ptr = src_argb;
- const uint8_t* src1_ptr = src_argb + src_stride;
-
- uint64_t src0, src1, src_hi, src_lo;
- uint64_t dest, dest_hi, dest_lo, dest0, dest1;
-
- const uint64_t mask = 0x0ULL;
- const uint64_t ph = 0x0002000200020002ULL;
- const uint64_t shift = 0x2ULL;
-
- __asm__ volatile(
- "1: \n\t"
-
- "lwc1 %[src0], 0x00(%[src0_ptr]) \n\t"
- "punpcklbh %[dest_lo], %[src0], %[mask] \n\t"
- "lwc1 %[src0], 0x04(%[src0_ptr]) \n\t"
- "punpcklbh %[dest_hi], %[src0], %[mask] \n\t"
-
- "lwc1 %[src1], 0x00(%[src1_ptr]) \n\t"
- "punpcklbh %[src_lo], %[src1], %[mask] \n\t"
- "lwc1 %[src1], 0x04(%[src1_ptr]) \n\t"
- "punpcklbh %[src_hi], %[src1], %[mask] \n\t"
- "paddh %[dest_lo], %[dest_lo], %[src_lo] \n\t"
- "paddh %[dest_hi], %[dest_hi], %[src_hi] \n\t"
- "paddh %[dest0], %[dest_hi], %[dest_lo] \n\t"
- "paddh %[dest0], %[dest0], %[ph] \n\t"
- "psrlh %[dest0], %[dest0], %[shift] \n\t"
-
- "dadd %[src0_ptr], %[src0_ptr], %[src_stepx_4] \n\t"
- "dadd %[src1_ptr], %[src1_ptr], %[src_stepx_4] \n\t"
-
- "lwc1 %[src0], 0x00(%[src0_ptr]) \n\t"
- "punpcklbh %[dest_lo], %[src0], %[mask] \n\t"
- "lwc1 %[src0], 0x04(%[src0_ptr]) \n\t"
- "punpcklbh %[dest_hi], %[src0], %[mask] \n\t"
-
- "lwc1 %[src1], 0x00(%[src1_ptr]) \n\t"
- "punpcklbh %[src_lo], %[src1], %[mask] \n\t"
- "lwc1 %[src1], 0x04(%[src1_ptr]) \n\t"
- "punpcklbh %[src_hi], %[src1], %[mask] \n\t"
- "paddh %[dest_lo], %[dest_lo], %[src_lo] \n\t"
- "paddh %[dest_hi], %[dest_hi], %[src_hi] \n\t"
- "paddh %[dest1], %[dest_hi], %[dest_lo] \n\t"
- "paddh %[dest1], %[dest1], %[ph] \n\t"
- "psrlh %[dest1], %[dest1], %[shift] \n\t"
-
- "packushb %[dest], %[dest0], %[dest1] \n\t"
- "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
- "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
-
- "dadd %[src0_ptr], %[src0_ptr], %[src_stepx_4] \n\t"
- "dadd %[src1_ptr], %[src1_ptr], %[src_stepx_4] \n\t"
- "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t"
- "daddi %[width], %[width], -0x02 \n\t"
- "bnez %[width], 1b \n\t"
- : [src_hi] "=&f"(src_hi), [src_lo] "=&f"(src_lo),
- [dest_hi] "=&f"(dest_hi), [dest_lo] "=&f"(dest_lo),
- [dest0] "=&f"(dest0), [dest1] "=&f"(dest1), [src0] "=&f"(src0),
- [src1] "=&f"(src1), [dest] "=&f"(dest)
- : [src0_ptr] "r"(src0_ptr), [src1_ptr] "r"(src1_ptr),
- [dst_ptr] "r"(dst_argb), [width] "r"(dst_width),
- [src_stepx_4] "r"(src_stepx << 2), [shift] "f"(shift), [mask] "f"(mask),
- [ph] "f"(ph)
- : "memory");
-}
-
-// Scales a single row of pixels using point sampling.
-void ScaleARGBCols_MMI(uint8_t* dst_argb,
- const uint8_t* src_argb,
- int dst_width,
- int x,
- int dx) {
- const uint32_t* src = (const uint32_t*)(src_argb);
- uint32_t* dst = (uint32_t*)(dst_argb);
-
- const uint32_t* src_tmp;
-
- uint64_t dest, offset;
-
- const uint64_t shift0 = 16;
- const uint64_t shift1 = 2;
-
- __asm__ volatile(
- "1: \n\t"
- "srav %[offset], %[x], %[shift0] \n\t"
- "sllv %[offset], %[offset], %[shift1] \n\t"
- "dadd %[src_tmp], %[src_ptr], %[offset] \n\t"
- "lwc1 %[dest], 0x00(%[src_tmp]) \n\t"
- "swc1 %[dest], 0x00(%[dst_ptr]) \n\t"
-
- "dadd %[x], %[x], %[dx] \n\t"
-
- "daddiu %[dst_ptr], %[dst_ptr], 0x04 \n\t"
- "daddi %[width], %[width], -0x01 \n\t"
- "bnez %[width], 1b \n\t"
- : [dest] "=&f"(dest), [offset] "=&r"(offset), [src_tmp] "=&r"(src_tmp)
- : [src_ptr] "r"(src), [dst_ptr] "r"(dst), [width] "r"(dst_width),
- [dx] "r"(dx), [x] "r"(x), [shift0] "r"(shift0), [shift1] "r"(shift1)
- : "memory");
-}
-
-// Scales a single row of pixels up by 2x using point sampling.
-void ScaleARGBColsUp2_MMI(uint8_t* dst_argb,
- const uint8_t* src_argb,
- int dst_width,
- int x,
- int dx) {
- uint64_t src, dest0, dest1;
- (void)x;
- (void)dx;
-
- __asm__ volatile(
- "1: \n\t"
- "gsldrc1 %[src], 0x00(%[src_ptr]) \n\t"
- "gsldlc1 %[src], 0x07(%[src_ptr]) \n\t"
- "punpcklwd %[dest0], %[src], %[src] \n\t"
- "gssdlc1 %[dest0], 0x07(%[dst_ptr]) \n\t"
- "gssdrc1 %[dest0], 0x00(%[dst_ptr]) \n\t"
- "punpckhwd %[dest1], %[src], %[src] \n\t"
- "gssdlc1 %[dest1], 0x0f(%[dst_ptr]) \n\t"
- "gssdrc1 %[dest1], 0x08(%[dst_ptr]) \n\t"
-
- "daddiu %[src_ptr], %[src_ptr], 0x08 \n\t"
- "daddiu %[dst_ptr], %[dst_ptr], 0x10 \n\t"
- "daddi %[width], %[width], -0x04 \n\t"
- "bnez %[width], 1b \n\t"
- : [dest0] "=&f"(dest0), [dest1] "=&f"(dest1), [src] "=&f"(src)
- : [src_ptr] "r"(src_argb), [dst_ptr] "r"(dst_argb), [width] "r"(dst_width)
- : "memory");
-}
-
-// Divide num by div and return as 16.16 fixed point result.
-/* LibYUVBaseTest.TestFixedDiv */
-int FixedDiv_MIPS(int num, int div) {
- int quotient = 0;
- const int shift = 16;
-
- asm(
- "dsll %[num], %[num], %[shift] \n\t"
- "ddiv %[num], %[div] \t\n"
- "mflo %[quo] \t\n"
- : [quo] "+&r"(quotient)
- : [num] "r"(num), [div] "r"(div), [shift] "r"(shift));
-
- return quotient;
-}
-
-// Divide num by div and return as 16.16 fixed point result.
-/* LibYUVScaleTest.ARGBScaleTo320x240_Linear */
-int FixedDiv1_MIPS(int num, int div) {
- int quotient = 0;
- const int shift = 16;
- const int val1 = 1;
- const int64_t val11 = 0x00010001ULL;
-
- asm(
- "dsll %[num], %[num], %[shift] \n\t"
- "dsub %[num], %[num], %[val11] \n\t"
- "dsub %[div], %[div], %[val1] \n\t"
- "ddiv %[num], %[div] \t\n"
- "mflo %[quo] \t\n"
- : [quo] "+&r"(quotient)
- : [num] "r"(num), [div] "r"(div), [val1] "r"(val1), [val11] "r"(val11),
- [shift] "r"(shift));
-
- return quotient;
-}
-
-// Read 8x2 upsample with filtering and write 16x1.
-// actually reads an extra pixel, so 9x2.
-void ScaleRowUp2_16_MMI(const uint16_t* src_ptr,
- ptrdiff_t src_stride,
- uint16_t* dst,
- int dst_width) {
- const uint16_t* src2_ptr = src_ptr + src_stride;
-
- uint64_t src0, src1;
- uint64_t dest, dest04, dest15, dest26, dest37;
- uint64_t tmp0, tmp1, tmp2, tmp3;
-
- const uint64_t mask0 = 0x0003000900030009ULL;
- const uint64_t mask1 = 0x0001000300010003ULL;
- const uint64_t mask2 = 0x0009000300090003ULL;
- const uint64_t mask3 = 0x0003000100030001ULL;
- const uint64_t ph = 0x0000000800000008ULL;
- const uint64_t shift = 4;
-
- __asm__ volatile(
- "1: \n\t"
- "gsldrc1 %[src0], 0x00(%[src1_ptr]) \n\t"
- "gsldlc1 %[src0], 0x07(%[src1_ptr]) \n\t"
- "pmaddhw %[dest04], %[src0], %[mask0] \n\t"
- "gsldrc1 %[src1], 0x00(%[src2_ptr]) \n\t"
- "gsldlc1 %[src1], 0x07(%[src2_ptr]) \n\t"
- "pmaddhw %[dest], %[src1], %[mask1] \n\t"
- "paddw %[dest04], %[dest04], %[dest] \n\t"
- "paddw %[dest04], %[dest04], %[ph] \n\t"
- "psrlw %[dest04], %[dest04], %[shift] \n\t"
-
- "pmaddhw %[dest15], %[src0], %[mask2] \n\t"
- "pmaddhw %[dest], %[src1], %[mask3] \n\t"
- "paddw %[dest15], %[dest15], %[dest] \n\t"
- "paddw %[dest15], %[dest15], %[ph] \n\t"
- "psrlw %[dest15], %[dest15], %[shift] \n\t"
-
- "gsldrc1 %[src0], 0x02(%[src1_ptr]) \n\t"
- "gsldlc1 %[src0], 0x09(%[src1_ptr]) \n\t"
- "pmaddhw %[dest26], %[src0], %[mask0] \n\t"
- "gsldrc1 %[src1], 0x02(%[src2_ptr]) \n\t"
- "gsldlc1 %[src1], 0x09(%[src2_ptr]) \n\t"
- "pmaddhw %[dest], %[src1], %[mask1] \n\t"
- "paddw %[dest26], %[dest26], %[dest] \n\t"
- "paddw %[dest26], %[dest26], %[ph] \n\t"
- "psrlw %[dest26], %[dest26], %[shift] \n\t"
-
- "pmaddhw %[dest37], %[src0], %[mask2] \n\t"
- "pmaddhw %[dest], %[src1], %[mask3] \n\t"
- "paddw %[dest37], %[dest37], %[dest] \n\t"
- "paddw %[dest37], %[dest37], %[ph] \n\t"
- "psrlw %[dest37], %[dest37], %[shift] \n\t"
-
- /* tmp0 = ( 00 04 02 06 ) */
- "packsswh %[tmp0], %[dest04], %[dest26] \n\t"
- /* tmp1 = ( 01 05 03 07 ) */
- "packsswh %[tmp1], %[dest15], %[dest37] \n\t"
-
- /* tmp2 = ( 00 01 04 05 )*/
- "punpcklhw %[tmp2], %[tmp0], %[tmp1] \n\t"
- /* tmp3 = ( 02 03 06 07 )*/
- "punpckhhw %[tmp3], %[tmp0], %[tmp1] \n\t"
-
- /* ( 00 01 02 03 ) */
- "punpcklwd %[dest], %[tmp2], %[tmp3] \n\t"
- "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
- "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
-
- /* ( 04 05 06 07 ) */
- "punpckhwd %[dest], %[tmp2], %[tmp3] \n\t"
- "gssdlc1 %[dest], 0x0f(%[dst_ptr]) \n\t"
- "gssdrc1 %[dest], 0x08(%[dst_ptr]) \n\t"
-
- "daddiu %[src1_ptr], %[src1_ptr], 0x08 \n\t"
- "daddiu %[src2_ptr], %[src2_ptr], 0x08 \n\t"
- "daddiu %[dst_ptr], %[dst_ptr], 0x10 \n\t"
- "daddi %[width], %[width], -0x08 \n\t"
- "bnez %[width], 1b \n\t"
- : [src0] "=&f"(src0), [src1] "=&f"(src1), [dest04] "=&f"(dest04),
- [dest15] "=&f"(dest15), [dest26] "=&f"(dest26), [dest37] "=&f"(dest37),
- [tmp0] "=&f"(tmp0), [tmp1] "=&f"(tmp1), [tmp2] "=&f"(tmp2),
- [tmp3] "=&f"(tmp3), [dest] "=&f"(dest)
- : [src1_ptr] "r"(src_ptr), [src2_ptr] "r"(src2_ptr), [dst_ptr] "r"(dst),
- [width] "r"(dst_width), [mask0] "f"(mask0), [mask1] "f"(mask1),
- [mask2] "f"(mask2), [mask3] "f"(mask3), [shift] "f"(shift), [ph] "f"(ph)
- : "memory");
-}
-
-void ScaleRowDown34_MMI(const uint8_t* src_ptr,
- ptrdiff_t src_stride,
- uint8_t* dst,
- int dst_width) {
- (void)src_stride;
- assert((dst_width % 3 == 0) && (dst_width > 0));
- uint64_t src[2];
- uint64_t tmp[2];
- __asm__ volatile (
- "1: \n\t"
- "gsldlc1 %[src0], 0x07(%[src_ptr]) \n\t"
- "gsldrc1 %[src0], 0x00(%[src_ptr]) \n\t"
- "gsldlc1 %[src1], 0x0f(%[src_ptr]) \n\t"
- "gsldrc1 %[src1], 0x08(%[src_ptr]) \n\t"
- "and %[tmp1], %[src0], %[mask1] \n\t"
- "psrlw %[tmp0], %[src0], %[rmov] \n\t"
- "psllw %[tmp0], %[tmp0], %[lmov1] \n\t"
- "or %[src0], %[tmp0], %[tmp1] \n\t"
- "punpckhwd %[tmp0], %[src0], %[src0] \n\t"
- "psllw %[tmp1], %[tmp0], %[rmov] \n\t"
- "or %[src0], %[src0], %[tmp1] \n\t"
- "psrlw %[tmp0], %[tmp0], %[rmov8] \n\t"
- "pextrh %[tmp0], %[tmp0], %[zero] \n\t"
- "pinsrh_2 %[src0], %[src0], %[tmp0] \n\t"
- "pextrh %[tmp0], %[src1], %[zero] \n\t"
- "pinsrh_3 %[src0], %[src0], %[tmp0] \n\t"
-
- "punpckhwd %[tmp0], %[src1], %[src1] \n\t"
- "pextrh %[tmp1], %[tmp0], %[zero] \n\t"
- "psrlw %[src1], %[src1], %[rmov] \n\t"
- "psllw %[tmp1], %[tmp1], %[rmov8] \n\t"
- "or %[src1], %[src1], %[tmp1] \n\t"
- "and %[tmp0], %[tmp0], %[mask2] \n\t"
- "or %[src1], %[src1], %[tmp0] \n\t"
-
- "gssdlc1 %[src0], 0x07(%[dst_ptr]) \n\t"
- "gssdrc1 %[src0], 0x00(%[dst_ptr]) \n\t"
- "gsswlc1 %[src1], 0x0b(%[dst_ptr]) \n\t"
- "gsswrc1 %[src1], 0x08(%[dst_ptr]) \n\t"
-
- "daddiu %[src_ptr], %[src_ptr], 0x10 \n\t"
- "daddi %[width], %[width], -0x0c \n\t"
- "daddiu %[dst_ptr], %[dst_ptr], 0x0c \n\t"
- "bnez %[width], 1b \n\t"
-
- : [src0]"=&f"(src[0]), [src1]"=&f"(src[1]),
- [tmp0]"=&f"(tmp[0]), [tmp1]"=&f"(tmp[1])
- : [src_ptr]"r"(src_ptr), [dst_ptr]"r"(dst),
- [lmov]"f"(0xc), [rmov]"f"(0x18),
- [mask1]"f"(0xffff0000ffff), [rmov8]"f"(0x8),
- [zero]"f"(0x0), [mask2]"f"(0xff000000),
- [width]"r"(dst_width), [lmov1]"f"(0x10)
- : "memory"
- );
-}
-// clang-format on
-
-#endif // !defined(LIBYUV_DISABLE_MMI) && defined(_MIPS_ARCH_LOONGSON3A)
-
-#ifdef __cplusplus
-} // extern "C"
-} // namespace libyuv
-#endif
diff --git a/files/tools_libyuv/autoroller/roll_deps.py b/files/tools_libyuv/autoroller/roll_deps.py
deleted file mode 100755
index 977c86de..00000000
--- a/files/tools_libyuv/autoroller/roll_deps.py
+++ /dev/null
@@ -1,509 +0,0 @@
-#!/usr/bin/env vpython3
-
-# Copyright 2017 The LibYuv Project Authors. All rights reserved.
-#
-# Use of this source code is governed by a BSD-style license
-# that can be found in the LICENSE file in the root of the source
-# tree. An additional intellectual property rights grant can be found
-# in the file PATENTS. All contributing project authors may
-# be found in the AUTHORS file in the root of the source tree.
-
-# This is a modified copy of the script in
-# https://webrtc.googlesource.com/src/+/master/tools_webrtc/autoroller/roll_deps.py
-# customized for libyuv.
-
-"""Script to automatically roll dependencies in the libyuv DEPS file."""
-
-import argparse
-import base64
-import collections
-import logging
-import os
-import re
-import subprocess
-import sys
-import urllib.request
-
-
-# Skip these dependencies (list without solution name prefix).
-DONT_AUTOROLL_THESE = [
- 'src/third_party/gflags/src',
-]
-
-LIBYUV_URL = 'https://chromium.googlesource.com/libyuv/libyuv'
-CHROMIUM_SRC_URL = 'https://chromium.googlesource.com/chromium/src'
-CHROMIUM_COMMIT_TEMPLATE = CHROMIUM_SRC_URL + '/+/%s'
-CHROMIUM_LOG_TEMPLATE = CHROMIUM_SRC_URL + '/+log/%s'
-CHROMIUM_FILE_TEMPLATE = CHROMIUM_SRC_URL + '/+/%s/%s'
-
-COMMIT_POSITION_RE = re.compile('^Cr-Commit-Position: .*#([0-9]+).*$')
-CLANG_REVISION_RE = re.compile(r'^CLANG_REVISION = \'([0-9a-z-]+)\'$')
-ROLL_BRANCH_NAME = 'roll_chromium_revision'
-
-SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
-CHECKOUT_SRC_DIR = os.path.realpath(os.path.join(SCRIPT_DIR, os.pardir,
- os.pardir))
-CHECKOUT_ROOT_DIR = os.path.realpath(os.path.join(CHECKOUT_SRC_DIR, os.pardir))
-
-sys.path.append(os.path.join(CHECKOUT_SRC_DIR, 'build'))
-import find_depot_tools # pylint: disable=wrong-import-position
-find_depot_tools.add_depot_tools_to_path()
-
-CLANG_UPDATE_SCRIPT_URL_PATH = 'tools/clang/scripts/update.py'
-CLANG_UPDATE_SCRIPT_LOCAL_PATH = os.path.join(CHECKOUT_SRC_DIR, 'tools',
- 'clang', 'scripts', 'update.py')
-
-DepsEntry = collections.namedtuple('DepsEntry', 'path url revision')
-ChangedDep = collections.namedtuple('ChangedDep',
- 'path url current_rev new_rev')
-
-class RollError(Exception):
- pass
-
-
-def VarLookup(local_scope):
- return lambda var_name: local_scope['vars'][var_name]
-
-
-def ParseDepsDict(deps_content):
- local_scope = {}
- global_scope = {
- 'Var': VarLookup(local_scope),
- 'Str': lambda s: s,
- 'deps_os': {},
- }
- exec(deps_content, global_scope, local_scope)
- return local_scope
-
-
-def ParseLocalDepsFile(filename):
- with open(filename, 'rb') as f:
- deps_content = f.read().decode('utf-8')
- return ParseDepsDict(deps_content)
-
-
-def ParseRemoteCrDepsFile(revision):
- deps_content = ReadRemoteCrFile('DEPS', revision)
- return ParseDepsDict(deps_content)
-
-
-def ParseCommitPosition(commit_message):
- for line in reversed(commit_message.splitlines()):
- m = COMMIT_POSITION_RE.match(line.strip())
- if m:
- return int(m.group(1))
- logging.error('Failed to parse commit position id from:\n%s\n',
- commit_message)
- sys.exit(-1)
-
-
-def _RunCommand(command, working_dir=None, ignore_exit_code=False,
- extra_env=None, input_data=None):
- """Runs a command and returns the output from that command.
-
- If the command fails (exit code != 0), the function will exit the process.
-
- Returns:
- A tuple containing the stdout and stderr outputs as strings.
- """
- working_dir = working_dir or CHECKOUT_SRC_DIR
- logging.debug('CMD: %s CWD: %s', ' '.join(command), working_dir)
- env = os.environ.copy()
- if extra_env:
- assert all(isinstance(value, str) for value in extra_env.values())
- logging.debug('extra env: %s', extra_env)
- env.update(extra_env)
- p = subprocess.Popen(command,
- stdin=subprocess.PIPE,
- stdout=subprocess.PIPE,
- stderr=subprocess.PIPE,
- env=env,
- cwd=working_dir,
- universal_newlines=True)
- std_output, err_output = p.communicate(input_data)
- p.stdout.close()
- p.stderr.close()
- if not ignore_exit_code and p.returncode != 0:
- logging.error('Command failed: %s\n'
- 'stdout:\n%s\n'
- 'stderr:\n%s\n', ' '.join(command), std_output, err_output)
- sys.exit(p.returncode)
- return std_output, err_output
-
-
-def _GetBranches():
- """Returns a tuple of active,branches.
-
- The 'active' is the name of the currently active branch and 'branches' is a
- list of all branches.
- """
- lines = _RunCommand(['git', 'branch'])[0].split('\n')
- branches = []
- active = ''
- for line in lines:
- if '*' in line:
- # The assumption is that the first char will always be the '*'.
- active = line[1:].strip()
- branches.append(active)
- else:
- branch = line.strip()
- if branch:
- branches.append(branch)
- return active, branches
-
-
-def _ReadGitilesContent(url):
- # Download and decode BASE64 content until
- # https://code.google.com/p/gitiles/issues/detail?id=7 is fixed.
- base64_content = ReadUrlContent(url + '?format=TEXT')
- return base64.b64decode(base64_content[0]).decode('utf-8')
-
-
-def ReadRemoteCrFile(path_below_src, revision):
- """Reads a remote Chromium file of a specific revision. Returns a string."""
- return _ReadGitilesContent(CHROMIUM_FILE_TEMPLATE % (revision,
- path_below_src))
-
-
-def ReadRemoteCrCommit(revision):
- """Reads a remote Chromium commit message. Returns a string."""
- return _ReadGitilesContent(CHROMIUM_COMMIT_TEMPLATE % revision)
-
-
-def ReadUrlContent(url):
- """Connect to a remote host and read the contents. Returns a list of lines."""
- conn = urllib.request.urlopen(url)
- try:
- return conn.readlines()
- except IOError as e:
- logging.exception('Error connecting to %s. Error: %s', url, e)
- raise
- finally:
- conn.close()
-
-
-def GetMatchingDepsEntries(depsentry_dict, dir_path):
- """Gets all deps entries matching the provided path.
-
- This list may contain more than one DepsEntry object.
- Example: dir_path='src/testing' would give results containing both
- 'src/testing/gtest' and 'src/testing/gmock' deps entries for Chromium's DEPS.
- Example 2: dir_path='src/build' should return 'src/build' but not
- 'src/buildtools'.
-
- Returns:
- A list of DepsEntry objects.
- """
- result = []
- for path, depsentry in depsentry_dict.items():
- if path == dir_path:
- result.append(depsentry)
- else:
- parts = path.split('/')
- if all(part == parts[i]
- for i, part in enumerate(dir_path.split('/'))):
- result.append(depsentry)
- return result
-
-def BuildDepsentryDict(deps_dict):
- """Builds a dict of paths to DepsEntry objects from a raw deps dict."""
- result = {}
-
- def AddDepsEntries(deps_subdict):
- for path, deps_url_spec in deps_subdict.items():
- if isinstance(deps_url_spec, dict):
- if deps_url_spec.get('dep_type') == 'cipd':
- continue
- deps_url = deps_url_spec['url']
- else:
- deps_url = deps_url_spec
- if not path in result:
- url, revision = deps_url.split('@') if deps_url else (None, None)
- result[path] = DepsEntry(path, url, revision)
-
- AddDepsEntries(deps_dict['deps'])
- for deps_os in ['win', 'mac', 'linux', 'android', 'ios', 'unix']:
- AddDepsEntries(deps_dict.get('deps_os', {}).get(deps_os, {}))
- return result
-
-
-def CalculateChangedDeps(libyuv_deps, new_cr_deps):
- """
- Calculate changed deps entries based on entries defined in the libyuv DEPS
- file:
- - If a shared dependency with the Chromium DEPS file: roll it to the same
- revision as Chromium (i.e. entry in the new_cr_deps dict)
- - If it's a Chromium sub-directory, roll it to the HEAD revision (notice
- this means it may be ahead of the chromium_revision, but generally these
- should be close).
- - If it's another DEPS entry (not shared with Chromium), roll it to HEAD
- unless it's configured to be skipped.
-
- Returns:
- A list of ChangedDep objects representing the changed deps.
- """
- result = []
- libyuv_entries = BuildDepsentryDict(libyuv_deps)
- new_cr_entries = BuildDepsentryDict(new_cr_deps)
- for path, libyuv_deps_entry in libyuv_entries.items():
- if path in DONT_AUTOROLL_THESE:
- continue
- cr_deps_entry = new_cr_entries.get(path)
- if cr_deps_entry:
- # Use the revision from Chromium's DEPS file.
- new_rev = cr_deps_entry.revision
- assert libyuv_deps_entry.url == cr_deps_entry.url, (
- 'Libyuv DEPS entry %s has a different URL (%s) than Chromium (%s).' %
- (path, libyuv_deps_entry.url, cr_deps_entry.url))
- else:
- # Use the HEAD of the deps repo.
- stdout, _ = _RunCommand(['git', 'ls-remote', libyuv_deps_entry.url,
- 'HEAD'])
- new_rev = stdout.strip().split('\t')[0]
-
- # Check if an update is necessary.
- if libyuv_deps_entry.revision != new_rev:
- logging.debug('Roll dependency %s to %s', path, new_rev)
- result.append(ChangedDep(path, libyuv_deps_entry.url,
- libyuv_deps_entry.revision, new_rev))
- return sorted(result)
-
-
-def CalculateChangedClang(new_cr_rev):
- def GetClangRev(lines):
- for line in lines:
- match = CLANG_REVISION_RE.match(line)
- if match:
- return match.group(1)
- raise RollError('Could not parse Clang revision from:\n' + '\n'.join(' ' + l for l in lines))
-
- with open(CLANG_UPDATE_SCRIPT_LOCAL_PATH, 'r') as f:
- current_lines = f.readlines()
- current_rev = GetClangRev(current_lines)
-
- new_clang_update_py = ReadRemoteCrFile(CLANG_UPDATE_SCRIPT_URL_PATH,
- new_cr_rev).splitlines()
- new_rev = GetClangRev(new_clang_update_py)
- return ChangedDep(CLANG_UPDATE_SCRIPT_LOCAL_PATH, None, current_rev, new_rev)
-
-
-def GenerateCommitMessage(current_cr_rev, new_cr_rev, current_commit_pos,
- new_commit_pos, changed_deps_list, clang_change):
- current_cr_rev = current_cr_rev[0:10]
- new_cr_rev = new_cr_rev[0:10]
- rev_interval = '%s..%s' % (current_cr_rev, new_cr_rev)
- git_number_interval = '%s:%s' % (current_commit_pos, new_commit_pos)
-
- commit_msg = ['Roll chromium_revision %s (%s)\n' % (rev_interval,
- git_number_interval)]
- commit_msg.append('Change log: %s' % (CHROMIUM_LOG_TEMPLATE % rev_interval))
- commit_msg.append('Full diff: %s\n' % (CHROMIUM_COMMIT_TEMPLATE %
- rev_interval))
- if changed_deps_list:
- commit_msg.append('Changed dependencies:')
-
- for c in changed_deps_list:
- commit_msg.append('* %s: %s/+log/%s..%s' % (c.path, c.url,
- c.current_rev[0:10],
- c.new_rev[0:10]))
- change_url = CHROMIUM_FILE_TEMPLATE % (rev_interval, 'DEPS')
- commit_msg.append('DEPS diff: %s\n' % change_url)
- else:
- commit_msg.append('No dependencies changed.')
-
- if clang_change.current_rev != clang_change.new_rev:
- commit_msg.append('Clang version changed %s:%s' %
- (clang_change.current_rev, clang_change.new_rev))
- change_url = CHROMIUM_FILE_TEMPLATE % (rev_interval,
- CLANG_UPDATE_SCRIPT_URL_PATH)
- commit_msg.append('Details: %s\n' % change_url)
- else:
- commit_msg.append('No update to Clang.\n')
-
- # TBR needs to be non-empty for Gerrit to process it.
- git_author = _RunCommand(['git', 'config', 'user.email'],
- working_dir=CHECKOUT_SRC_DIR)[0].strip()
- commit_msg.append('TBR=%s' % git_author)
-
- commit_msg.append('BUG=None')
- return '\n'.join(commit_msg)
-
-
-def UpdateDepsFile(deps_filename, old_cr_revision, new_cr_revision,
- changed_deps):
- """Update the DEPS file with the new revision."""
-
- # Update the chromium_revision variable.
- with open(deps_filename, 'rb') as deps_file:
- deps_content = deps_file.read().decode('utf-8')
- deps_content = deps_content.replace(old_cr_revision, new_cr_revision)
- with open(deps_filename, 'wb') as deps_file:
- deps_file.write(deps_content.encode('utf-8'))
-
- # Update each individual DEPS entry.
- for dep in changed_deps:
- local_dep_dir = os.path.join(CHECKOUT_ROOT_DIR, dep.path)
- if not os.path.isdir(local_dep_dir):
- raise RollError(
- 'Cannot find local directory %s. Make sure the .gclient file\n'
- 'contains all platforms in the target_os list, i.e.\n'
- 'target_os = ["android", "unix", "mac", "ios", "win"];\n'
- 'Then run "gclient sync" again.' % local_dep_dir)
- _RunCommand(
- ['gclient', 'setdep', '--revision', '%s@%s' % (dep.path, dep.new_rev)],
- working_dir=CHECKOUT_SRC_DIR)
-
-
-def _IsTreeClean():
- stdout, _ = _RunCommand(['git', 'status', '--porcelain'])
- if len(stdout) == 0:
- return True
-
- logging.error('Dirty/unversioned files:\n%s', stdout)
- return False
-
-
-def _EnsureUpdatedMasterBranch(dry_run):
- current_branch = _RunCommand(
- ['git', 'rev-parse', '--abbrev-ref', 'HEAD'])[0].splitlines()[0]
- if current_branch != 'main':
- logging.error('Please checkout the main branch and re-run this script.')
- if not dry_run:
- sys.exit(-1)
-
- logging.info('Updating main branch...')
- _RunCommand(['git', 'pull'])
-
-
-def _CreateRollBranch(dry_run):
- logging.info('Creating roll branch: %s', ROLL_BRANCH_NAME)
- if not dry_run:
- _RunCommand(['git', 'checkout', '-b', ROLL_BRANCH_NAME])
-
-
-def _RemovePreviousRollBranch(dry_run):
- active_branch, branches = _GetBranches()
- if active_branch == ROLL_BRANCH_NAME:
- active_branch = 'main'
- if ROLL_BRANCH_NAME in branches:
- logging.info('Removing previous roll branch (%s)', ROLL_BRANCH_NAME)
- if not dry_run:
- _RunCommand(['git', 'checkout', active_branch])
- _RunCommand(['git', 'branch', '-D', ROLL_BRANCH_NAME])
-
-
-def _LocalCommit(commit_msg, dry_run):
- logging.info('Committing changes locally.')
- if not dry_run:
- _RunCommand(['git', 'add', '--update', '.'])
- _RunCommand(['git', 'commit', '-m', commit_msg])
-
-
-def ChooseCQMode(skip_cq, cq_over, current_commit_pos, new_commit_pos):
- if skip_cq:
- return 0
- if (new_commit_pos - current_commit_pos) < cq_over:
- return 1
- return 2
-
-
-def _UploadCL(commit_queue_mode):
- """Upload the committed changes as a changelist to Gerrit.
-
- commit_queue_mode:
- - 2: Submit to commit queue.
- - 1: Run trybots but do not submit to CQ.
- - 0: Skip CQ, upload only.
- """
- cmd = ['git', 'cl', 'upload', '--force', '--bypass-hooks', '--send-mail']
- if commit_queue_mode >= 2:
- logging.info('Sending the CL to the CQ...')
- cmd.extend(['-o', 'label=Bot-Commit+1'])
- cmd.extend(['-o', 'label=Commit-Queue+2'])
- elif commit_queue_mode >= 1:
- logging.info('Starting CQ dry run...')
- cmd.extend(['-o', 'label=Commit-Queue+1'])
- extra_env = {
- 'EDITOR': 'true',
- 'SKIP_GCE_AUTH_FOR_GIT': '1',
- }
- stdout, stderr = _RunCommand(cmd, extra_env=extra_env)
- logging.debug('Output from "git cl upload":\nstdout:\n%s\n\nstderr:\n%s',
- stdout, stderr)
-
-
-def main():
- p = argparse.ArgumentParser()
- p.add_argument('--clean', action='store_true', default=False,
- help='Removes any previous local roll branch.')
- p.add_argument('-r', '--revision',
- help=('Chromium Git revision to roll to. Defaults to the '
- 'Chromium HEAD revision if omitted.'))
- p.add_argument('--dry-run', action='store_true', default=False,
- help=('Calculate changes and modify DEPS, but don\'t create '
- 'any local branch, commit, upload CL or send any '
- 'tryjobs.'))
- p.add_argument('-i', '--ignore-unclean-workdir', action='store_true',
- default=False,
- help=('Ignore if the current branch is not main or if there '
- 'are uncommitted changes (default: %(default)s).'))
- grp = p.add_mutually_exclusive_group()
- grp.add_argument('--skip-cq', action='store_true', default=False,
- help='Skip sending the CL to the CQ (default: %(default)s)')
- grp.add_argument('--cq-over', type=int, default=1,
- help=('Commit queue dry run if the revision difference '
- 'is below this number (default: %(default)s)'))
- p.add_argument('-v', '--verbose', action='store_true', default=False,
- help='Be extra verbose in printing of log messages.')
- opts = p.parse_args()
-
- if opts.verbose:
- logging.basicConfig(level=logging.DEBUG)
- else:
- logging.basicConfig(level=logging.INFO)
-
- if not opts.ignore_unclean_workdir and not _IsTreeClean():
- logging.error('Please clean your local checkout first.')
- return 1
-
- if opts.clean:
- _RemovePreviousRollBranch(opts.dry_run)
-
- if not opts.ignore_unclean_workdir:
- _EnsureUpdatedMasterBranch(opts.dry_run)
-
- new_cr_rev = opts.revision
- if not new_cr_rev:
- stdout, _ = _RunCommand(['git', 'ls-remote', CHROMIUM_SRC_URL, 'HEAD'])
- head_rev = stdout.strip().split('\t')[0]
- logging.info('No revision specified. Using HEAD: %s', head_rev)
- new_cr_rev = head_rev
-
- deps_filename = os.path.join(CHECKOUT_SRC_DIR, 'DEPS')
- libyuv_deps = ParseLocalDepsFile(deps_filename)
- current_cr_rev = libyuv_deps['vars']['chromium_revision']
-
- current_commit_pos = ParseCommitPosition(ReadRemoteCrCommit(current_cr_rev))
- new_commit_pos = ParseCommitPosition(ReadRemoteCrCommit(new_cr_rev))
-
- new_cr_deps = ParseRemoteCrDepsFile(new_cr_rev)
- changed_deps = CalculateChangedDeps(libyuv_deps, new_cr_deps)
- clang_change = CalculateChangedClang(new_cr_rev)
- commit_msg = GenerateCommitMessage(current_cr_rev, new_cr_rev,
- current_commit_pos, new_commit_pos,
- changed_deps, clang_change)
- logging.debug('Commit message:\n%s', commit_msg)
-
- _CreateRollBranch(opts.dry_run)
- UpdateDepsFile(deps_filename, current_cr_rev, new_cr_rev, changed_deps)
- _LocalCommit(commit_msg, opts.dry_run)
- commit_queue_mode = ChooseCQMode(opts.skip_cq, opts.cq_over,
- current_commit_pos, new_commit_pos)
- logging.info('Uploading CL...')
- if not opts.dry_run:
- _UploadCL(commit_queue_mode)
- return 0
-
-
-if __name__ == '__main__':
- sys.exit(main())
diff --git a/files/include/libyuv.h b/include/libyuv.h
index a06e1233..a06e1233 100644
--- a/files/include/libyuv.h
+++ b/include/libyuv.h
diff --git a/files/include/libyuv/basic_types.h b/include/libyuv/basic_types.h
index 1bea67f2..1bea67f2 100644
--- a/files/include/libyuv/basic_types.h
+++ b/include/libyuv/basic_types.h
diff --git a/files/include/libyuv/compare.h b/include/libyuv/compare.h
index 3353ad71..3353ad71 100644
--- a/files/include/libyuv/compare.h
+++ b/include/libyuv/compare.h
diff --git a/files/include/libyuv/compare_row.h b/include/libyuv/compare_row.h
index d8e82d72..8293c919 100644
--- a/files/include/libyuv/compare_row.h
+++ b/include/libyuv/compare_row.h
@@ -28,7 +28,10 @@ extern "C" {
#endif
// MemorySanitizer does not support assembly code yet. http://crbug.com/344505
#if defined(__has_feature)
-#if __has_feature(memory_sanitizer)
+#if __has_feature(memory_sanitizer) && !defined(LIBYUV_DISABLE_NEON)
+#define LIBYUV_DISABLE_NEON
+#endif
+#if __has_feature(memory_sanitizer) && !defined(LIBYUV_DISABLE_X86)
#define LIBYUV_DISABLE_X86
#endif
#endif
diff --git a/files/include/libyuv/convert.h b/include/libyuv/convert.h
index 46d37159..88619a4f 100644
--- a/files/include/libyuv/convert.h
+++ b/include/libyuv/convert.h
@@ -151,6 +151,33 @@ int MM21ToI420(const uint8_t* src_y,
int width,
int height);
+// Convert MM21 to YUY2
+LIBYUV_API
+int MM21ToYUY2(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_uv,
+ int src_stride_uv,
+ uint8_t* dst_yuy2,
+ int dst_stride_yuy2,
+ int width,
+ int height);
+
+// Convert MT2T to P010
+// Note that src_y and src_uv point to packed 10-bit values, so the Y plane will
+// be 10 / 8 times the dimensions of the image. Also for this reason,
+// src_stride_y and src_stride_uv are given in bytes.
+LIBYUV_API
+int MT2TToP010(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_uv,
+ int src_stride_uv,
+ uint16_t* dst_y,
+ int dst_stride_y,
+ uint16_t* dst_uv,
+ int dst_stride_uv,
+ int width,
+ int height);
+
// Convert I422 to NV21.
LIBYUV_API
int I422ToNV21(const uint8_t* src_y,
@@ -272,6 +299,23 @@ int I210ToI422(const uint16_t* src_y,
int width,
int height);
+#define H410ToH420 I410ToI420
+LIBYUV_API
+int I410ToI420(const uint16_t* src_y,
+ int src_stride_y,
+ const uint16_t* src_u,
+ int src_stride_u,
+ const uint16_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ uint8_t* dst_u,
+ int dst_stride_u,
+ uint8_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height);
+
#define H410ToH444 I410ToI444
LIBYUV_API
int I410ToI444(const uint16_t* src_y,
@@ -323,6 +367,23 @@ int I212ToI422(const uint16_t* src_y,
int width,
int height);
+#define H212ToH420 I212ToI420
+LIBYUV_API
+int I212ToI420(const uint16_t* src_y,
+ int src_stride_y,
+ const uint16_t* src_u,
+ int src_stride_u,
+ const uint16_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ uint8_t* dst_u,
+ int dst_stride_u,
+ uint8_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height);
+
#define H412ToH444 I412ToI444
LIBYUV_API
int I412ToI444(const uint16_t* src_y,
@@ -340,6 +401,23 @@ int I412ToI444(const uint16_t* src_y,
int width,
int height);
+#define H412ToH420 I412ToI420
+LIBYUV_API
+int I412ToI420(const uint16_t* src_y,
+ int src_stride_y,
+ const uint16_t* src_u,
+ int src_stride_u,
+ const uint16_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ uint8_t* dst_u,
+ int dst_stride_u,
+ uint8_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height);
+
#define I412ToI012 I410ToI010
#define H410ToH010 I410ToI010
#define H412ToH012 I410ToI010
@@ -560,6 +638,36 @@ int NV16ToNV24(const uint8_t* src_y,
int width,
int height);
+// Convert P010 to I010.
+LIBYUV_API
+int P010ToI010(const uint16_t* src_y,
+ int src_stride_y,
+ const uint16_t* src_uv,
+ int src_stride_uv,
+ uint16_t* dst_y,
+ int dst_stride_y,
+ uint16_t* dst_u,
+ int dst_stride_u,
+ uint16_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height);
+
+// Convert P012 to I012.
+LIBYUV_API
+int P012ToI012(const uint16_t* src_y,
+ int src_stride_y,
+ const uint16_t* src_uv,
+ int src_stride_uv,
+ uint16_t* dst_y,
+ int dst_stride_y,
+ uint16_t* dst_u,
+ int dst_stride_u,
+ uint16_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height);
+
// Convert P010 to P410.
LIBYUV_API
int P010ToP410(const uint16_t* src_y,
@@ -677,6 +785,21 @@ int ARGBToI420(const uint8_t* src_argb,
int width,
int height);
+// Convert ARGB to I420 with Alpha
+LIBYUV_API
+int ARGBToI420Alpha(const uint8_t* src_argb,
+ int src_stride_argb,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ uint8_t* dst_u,
+ int dst_stride_u,
+ uint8_t* dst_v,
+ int dst_stride_v,
+ uint8_t* dst_a,
+ int dst_stride_a,
+ int width,
+ int height);
+
// BGRA little endian (argb in memory) to I420.
LIBYUV_API
int BGRAToI420(const uint8_t* src_bgra,
diff --git a/files/include/libyuv/convert_argb.h b/include/libyuv/convert_argb.h
index f66d20ce..35eeac9b 100644
--- a/files/include/libyuv/convert_argb.h
+++ b/include/libyuv/convert_argb.h
@@ -67,6 +67,8 @@ LIBYUV_API extern const struct YuvConstants kYvuV2020Constants; // BT.2020 full
I210ToAR30Matrix(a, b, e, f, c, d, g, h, i##VU, j, k)
#define I410ToAB30Matrix(a, b, c, d, e, f, g, h, i, j, k) \
I410ToAR30Matrix(a, b, e, f, c, d, g, h, i##VU, j, k)
+#define I012ToAB30Matrix(a, b, c, d, e, f, g, h, i, j, k) \
+ I012ToAR30Matrix(a, b, e, f, c, d, g, h, i##VU, j, k)
#define I420AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, k, l, m, n) \
I420AlphaToARGBMatrix(a, b, e, f, c, d, g, h, i, j, k##VU, l, m, n)
#define I422AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, k, l, m, n) \
@@ -404,6 +406,32 @@ int U444ToABGR(const uint8_t* src_y,
int width,
int height);
+// Convert I444 to RGB24.
+LIBYUV_API
+int I444ToRGB24(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_rgb24,
+ int dst_stride_rgb24,
+ int width,
+ int height);
+
+// Convert I444 to RAW.
+LIBYUV_API
+int I444ToRAW(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_raw,
+ int dst_stride_raw,
+ int width,
+ int height);
+
// Convert I010 to ARGB.
LIBYUV_API
int I010ToARGB(const uint16_t* src_y,
@@ -1312,6 +1340,32 @@ int J420ToRAW(const uint8_t* src_y,
int width,
int height);
+// Convert I422 to RGB24.
+LIBYUV_API
+int I422ToRGB24(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_rgb24,
+ int dst_stride_rgb24,
+ int width,
+ int height);
+
+// Convert I422 to RAW.
+LIBYUV_API
+int I422ToRAW(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_raw,
+ int dst_stride_raw,
+ int width,
+ int height);
+
LIBYUV_API
int I420ToRGB565(const uint8_t* src_y,
int src_stride_y,
@@ -1495,6 +1549,20 @@ int I444ToARGBMatrix(const uint8_t* src_y,
int width,
int height);
+// Convert I444 to RGB24 with matrix.
+LIBYUV_API
+int I444ToRGB24Matrix(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_rgb24,
+ int dst_stride_rgb24,
+ const struct YuvConstants* yuvconstants,
+ int width,
+ int height);
+
// Convert 10 bit 420 YUV to ARGB with matrix.
LIBYUV_API
int I010ToAR30Matrix(const uint16_t* src_y,
@@ -1893,6 +1961,20 @@ int I420ToRGB24Matrix(const uint8_t* src_y,
int width,
int height);
+// Convert I422 to RGB24 with matrix.
+LIBYUV_API
+int I422ToRGB24Matrix(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_rgb24,
+ int dst_stride_rgb24,
+ const struct YuvConstants* yuvconstants,
+ int width,
+ int height);
+
// Convert I420 to RGB565 with specified color matrix.
LIBYUV_API
int I420ToRGB565Matrix(const uint8_t* src_y,
@@ -1907,6 +1989,20 @@ int I420ToRGB565Matrix(const uint8_t* src_y,
int width,
int height);
+// Convert I422 to RGB565 with specified color matrix.
+LIBYUV_API
+int I422ToRGB565Matrix(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_rgb565,
+ int dst_stride_rgb565,
+ const struct YuvConstants* yuvconstants,
+ int width,
+ int height);
+
// Convert I420 to AR30 with matrix.
LIBYUV_API
int I420ToAR30Matrix(const uint8_t* src_y,
@@ -1961,6 +2057,36 @@ int I422ToARGBMatrixFilter(const uint8_t* src_y,
int height,
enum FilterMode filter);
+// Convert I422 to RGB24 with matrix and UV filter mode.
+LIBYUV_API
+int I422ToRGB24MatrixFilter(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_rgb24,
+ int dst_stride_rgb24,
+ const struct YuvConstants* yuvconstants,
+ int width,
+ int height,
+ enum FilterMode filter);
+
+// Convert I420 to RGB24 with matrix and UV filter mode.
+LIBYUV_API
+int I420ToRGB24MatrixFilter(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_rgb24,
+ int dst_stride_rgb24,
+ const struct YuvConstants* yuvconstants,
+ int width,
+ int height,
+ enum FilterMode filter);
+
// Convert I010 to AR30 with matrix and UV filter mode.
LIBYUV_API
int I010ToAR30MatrixFilter(const uint16_t* src_y,
diff --git a/files/include/libyuv/convert_from.h b/include/libyuv/convert_from.h
index 32f42a63..32f42a63 100644
--- a/files/include/libyuv/convert_from.h
+++ b/include/libyuv/convert_from.h
diff --git a/files/include/libyuv/convert_from_argb.h b/include/libyuv/convert_from_argb.h
index 2a488838..ff2a581a 100644
--- a/files/include/libyuv/convert_from_argb.h
+++ b/include/libyuv/convert_from_argb.h
@@ -209,10 +209,10 @@ int ARGBToJ420(const uint8_t* src_argb,
int src_stride_argb,
uint8_t* dst_yj,
int dst_stride_yj,
- uint8_t* dst_u,
- int dst_stride_u,
- uint8_t* dst_v,
- int dst_stride_v,
+ uint8_t* dst_uj,
+ int dst_stride_uj,
+ uint8_t* dst_vj,
+ int dst_stride_vj,
int width,
int height);
@@ -222,10 +222,10 @@ int ARGBToJ422(const uint8_t* src_argb,
int src_stride_argb,
uint8_t* dst_yj,
int dst_stride_yj,
- uint8_t* dst_u,
- int dst_stride_u,
- uint8_t* dst_v,
- int dst_stride_v,
+ uint8_t* dst_uj,
+ int dst_stride_uj,
+ uint8_t* dst_vj,
+ int dst_stride_vj,
int width,
int height);
@@ -238,6 +238,41 @@ int ARGBToJ400(const uint8_t* src_argb,
int width,
int height);
+// Convert ABGR to J420. (JPeg full range I420).
+LIBYUV_API
+int ABGRToJ420(const uint8_t* src_abgr,
+ int src_stride_abgr,
+ uint8_t* dst_yj,
+ int dst_stride_yj,
+ uint8_t* dst_uj,
+ int dst_stride_uj,
+ uint8_t* dst_vj,
+ int dst_stride_vj,
+ int width,
+ int height);
+
+// Convert ABGR to J422.
+LIBYUV_API
+int ABGRToJ422(const uint8_t* src_abgr,
+ int src_stride_abgr,
+ uint8_t* dst_yj,
+ int dst_stride_yj,
+ uint8_t* dst_uj,
+ int dst_stride_uj,
+ uint8_t* dst_vj,
+ int dst_stride_vj,
+ int width,
+ int height);
+
+// Convert ABGR to J400. (JPeg full range).
+LIBYUV_API
+int ABGRToJ400(const uint8_t* src_abgr,
+ int src_stride_abgr,
+ uint8_t* dst_yj,
+ int dst_stride_yj,
+ int width,
+ int height);
+
// Convert RGBA to J400. (JPeg full range).
LIBYUV_API
int RGBAToJ400(const uint8_t* src_rgba,
diff --git a/files/include/libyuv/cpu_id.h b/include/libyuv/cpu_id.h
index fb90c6c7..5a81e7c9 100644
--- a/files/include/libyuv/cpu_id.h
+++ b/include/libyuv/cpu_id.h
@@ -31,30 +31,36 @@ static const int kCpuHasX86 = 0x10;
static const int kCpuHasSSE2 = 0x20;
static const int kCpuHasSSSE3 = 0x40;
static const int kCpuHasSSE41 = 0x80;
-static const int kCpuHasSSE42 = 0x100; // unused at this time.
+static const int kCpuHasSSE42 = 0x100;
static const int kCpuHasAVX = 0x200;
static const int kCpuHasAVX2 = 0x400;
static const int kCpuHasERMS = 0x800;
static const int kCpuHasFMA3 = 0x1000;
static const int kCpuHasF16C = 0x2000;
-static const int kCpuHasGFNI = 0x4000;
-static const int kCpuHasAVX512BW = 0x8000;
-static const int kCpuHasAVX512VL = 0x10000;
-static const int kCpuHasAVX512VNNI = 0x20000;
-static const int kCpuHasAVX512VBMI = 0x40000;
-static const int kCpuHasAVX512VBMI2 = 0x80000;
-static const int kCpuHasAVX512VBITALG = 0x100000;
-static const int kCpuHasAVX512VPOPCNTDQ = 0x200000;
+static const int kCpuHasAVX512BW = 0x4000;
+static const int kCpuHasAVX512VL = 0x8000;
+static const int kCpuHasAVX512VNNI = 0x10000;
+static const int kCpuHasAVX512VBMI = 0x20000;
+static const int kCpuHasAVX512VBMI2 = 0x40000;
+static const int kCpuHasAVX512VBITALG = 0x80000;
+static const int kCpuHasAVX10 = 0x100000;
+static const int kCpuHasAVXVNNI = 0x200000;
+static const int kCpuHasAVXVNNIINT8 = 0x400000;
// These flags are only valid on MIPS processors.
-static const int kCpuHasMIPS = 0x400000;
-static const int kCpuHasMSA = 0x800000;
+static const int kCpuHasMIPS = 0x800000;
+static const int kCpuHasMSA = 0x1000000;
// These flags are only valid on LOONGARCH processors.
static const int kCpuHasLOONGARCH = 0x2000000;
static const int kCpuHasLSX = 0x4000000;
static const int kCpuHasLASX = 0x8000000;
+// These flags are only valid on RISCV processors.
+static const int kCpuHasRISCV = 0x10000000;
+static const int kCpuHasRVV = 0x20000000;
+static const int kCpuHasRVVZVFH = 0x40000000;
+
// Optional init function. TestCpuFlag does an auto-init.
// Returns cpu_info flags.
LIBYUV_API
@@ -78,6 +84,8 @@ LIBYUV_API
int ArmCpuCaps(const char* cpuinfo_name);
LIBYUV_API
int MipsCpuCaps(const char* cpuinfo_name);
+LIBYUV_API
+int RiscvCpuCaps(const char* cpuinfo_name);
// For testing, allow CPU flags to be disabled.
// ie MaskCpuFlags(~kCpuHasSSSE3) to disable SSSE3.
diff --git a/files/include/libyuv/loongson_intrinsics.h b/include/libyuv/loongson_intrinsics.h
index 1d613def..1d613def 100644
--- a/files/include/libyuv/loongson_intrinsics.h
+++ b/include/libyuv/loongson_intrinsics.h
diff --git a/files/include/libyuv/macros_msa.h b/include/libyuv/macros_msa.h
index b9a44fcc..b9a44fcc 100644
--- a/files/include/libyuv/macros_msa.h
+++ b/include/libyuv/macros_msa.h
diff --git a/files/include/libyuv/mjpeg_decoder.h b/include/libyuv/mjpeg_decoder.h
index 275f8d4c..275f8d4c 100644
--- a/files/include/libyuv/mjpeg_decoder.h
+++ b/include/libyuv/mjpeg_decoder.h
diff --git a/files/include/libyuv/planar_functions.h b/include/libyuv/planar_functions.h
index 1ef2256b..f9344721 100644
--- a/files/include/libyuv/planar_functions.h
+++ b/include/libyuv/planar_functions.h
@@ -30,7 +30,10 @@ extern "C" {
#endif
// MemorySanitizer does not support assembly code yet. http://crbug.com/344505
#if defined(__has_feature)
-#if __has_feature(memory_sanitizer)
+#if __has_feature(memory_sanitizer) && !defined(LIBYUV_DISABLE_NEON)
+#define LIBYUV_DISABLE_NEON
+#endif
+#if __has_feature(memory_sanitizer) && !defined(LIBYUV_DISABLE_X86)
#define LIBYUV_DISABLE_X86
#endif
#endif
@@ -85,13 +88,23 @@ void SetPlane(uint8_t* dst_y,
// Convert a plane of tiles of 16 x H to linear.
LIBYUV_API
-void DetilePlane(const uint8_t* src_y,
- int src_stride_y,
- uint8_t* dst_y,
- int dst_stride_y,
- int width,
- int height,
- int tile_height);
+int DetilePlane(const uint8_t* src_y,
+ int src_stride_y,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ int width,
+ int height,
+ int tile_height);
+
+// Convert a plane of 16 bit tiles of 16 x H to linear.
+LIBYUV_API
+int DetilePlane_16(const uint16_t* src_y,
+ int src_stride_y,
+ uint16_t* dst_y,
+ int dst_stride_y,
+ int width,
+ int height,
+ int tile_height);
// Convert a UV plane of tiles of 16 x H into linear U and V planes.
LIBYUV_API
@@ -105,6 +118,18 @@ void DetileSplitUVPlane(const uint8_t* src_uv,
int height,
int tile_height);
+// Convert a Y and UV plane of tiles into interlaced YUY2.
+LIBYUV_API
+void DetileToYUY2(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_uv,
+ int src_stride_uv,
+ uint8_t* dst_yuy2,
+ int dst_stride_yuy2,
+ int width,
+ int height,
+ int tile_height);
+
// Split interleaved UV plane into separate U and V planes.
LIBYUV_API
void SplitUVPlane(const uint8_t* src_uv,
@@ -370,7 +395,26 @@ int I210Copy(const uint16_t* src_y,
int width,
int height);
+// Copy I410 to I410.
+#define I410ToI410 I410Copy
+LIBYUV_API
+int I410Copy(const uint16_t* src_y,
+ int src_stride_y,
+ const uint16_t* src_u,
+ int src_stride_u,
+ const uint16_t* src_v,
+ int src_stride_v,
+ uint16_t* dst_y,
+ int dst_stride_y,
+ uint16_t* dst_u,
+ int dst_stride_u,
+ uint16_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height);
+
// Copy NV12. Supports inverting.
+LIBYUV_API
int NV12Copy(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_uv,
@@ -383,6 +427,7 @@ int NV12Copy(const uint8_t* src_y,
int height);
// Copy NV21. Supports inverting.
+LIBYUV_API
int NV21Copy(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_vu,
@@ -785,15 +830,6 @@ int ARGBCopyYToAlpha(const uint8_t* src_y,
int width,
int height);
-typedef void (*ARGBBlendRow)(const uint8_t* src_argb0,
- const uint8_t* src_argb1,
- uint8_t* dst_argb,
- int width);
-
-// Get function to Alpha Blend ARGB pixels and store to destination.
-LIBYUV_API
-ARGBBlendRow GetARGBBlend();
-
// Alpha Blend ARGB images and store to destination.
// Source is pre-multiplied by alpha using ARGBAttenuate.
// Alpha of destination is set to 255.
diff --git a/files/include/libyuv/rotate.h b/include/libyuv/rotate.h
index 684ed5e6..37460c4a 100644
--- a/files/include/libyuv/rotate.h
+++ b/include/libyuv/rotate.h
@@ -85,6 +85,60 @@ int I444Rotate(const uint8_t* src_y,
int height,
enum RotationMode mode);
+// Rotate I010 frame.
+LIBYUV_API
+int I010Rotate(const uint16_t* src_y,
+ int src_stride_y,
+ const uint16_t* src_u,
+ int src_stride_u,
+ const uint16_t* src_v,
+ int src_stride_v,
+ uint16_t* dst_y,
+ int dst_stride_y,
+ uint16_t* dst_u,
+ int dst_stride_u,
+ uint16_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height,
+ enum RotationMode mode);
+
+// Rotate I210 frame.
+LIBYUV_API
+int I210Rotate(const uint16_t* src_y,
+ int src_stride_y,
+ const uint16_t* src_u,
+ int src_stride_u,
+ const uint16_t* src_v,
+ int src_stride_v,
+ uint16_t* dst_y,
+ int dst_stride_y,
+ uint16_t* dst_u,
+ int dst_stride_u,
+ uint16_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height,
+ enum RotationMode mode);
+
+// Rotate I410 frame.
+LIBYUV_API
+int I410Rotate(const uint16_t* src_y,
+ int src_stride_y,
+ const uint16_t* src_u,
+ int src_stride_u,
+ const uint16_t* src_v,
+ int src_stride_v,
+ uint16_t* dst_y,
+ int dst_stride_y,
+ uint16_t* dst_u,
+ int dst_stride_u,
+ uint16_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height,
+ enum RotationMode mode);
+
// Rotate NV12 input and store in I420.
LIBYUV_API
int NV12ToI420Rotate(const uint8_t* src_y,
@@ -156,6 +210,16 @@ void RotatePlane270(const uint8_t* src,
int width,
int height);
+// Rotate a plane by 0, 90, 180, or 270.
+LIBYUV_API
+int RotatePlane_16(const uint16_t* src,
+ int src_stride,
+ uint16_t* dst,
+ int dst_stride,
+ int width,
+ int height,
+ enum RotationMode mode);
+
// Rotations for when U and V are interleaved.
// These functions take one UV input pointer and
// split the data into two buffers while
diff --git a/files/include/libyuv/rotate_argb.h b/include/libyuv/rotate_argb.h
index 20432949..20432949 100644
--- a/files/include/libyuv/rotate_argb.h
+++ b/include/libyuv/rotate_argb.h
diff --git a/files/include/libyuv/rotate_row.h b/include/libyuv/rotate_row.h
index aa8528a9..3e6a2fef 100644
--- a/files/include/libyuv/rotate_row.h
+++ b/include/libyuv/rotate_row.h
@@ -28,7 +28,10 @@ extern "C" {
#endif
// MemorySanitizer does not support assembly code yet. http://crbug.com/344505
#if defined(__has_feature)
-#if __has_feature(memory_sanitizer)
+#if __has_feature(memory_sanitizer) && !defined(LIBYUV_DISABLE_NEON)
+#define LIBYUV_DISABLE_NEON
+#endif
+#if __has_feature(memory_sanitizer) && !defined(LIBYUV_DISABLE_X86)
#define LIBYUV_DISABLE_X86
#endif
#endif
@@ -42,6 +45,8 @@ extern "C" {
// The following are available for GCC 32 or 64 bit:
#if !defined(LIBYUV_DISABLE_X86) && (defined(__i386__) || defined(__x86_64__))
#define HAS_TRANSPOSEWX8_SSSE3
+#define HAS_TRANSPOSE4X4_32_SSE2
+#define HAS_TRANSPOSE4X4_32_AVX2
#endif
// The following are available for 64 bit GCC:
@@ -54,6 +59,7 @@ extern "C" {
(defined(__ARM_NEON__) || defined(LIBYUV_NEON) || defined(__aarch64__))
#define HAS_TRANSPOSEWX8_NEON
#define HAS_TRANSPOSEUVWX8_NEON
+#define HAS_TRANSPOSE4X4_32_NEON
#endif
#if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa)
@@ -215,6 +221,48 @@ void TransposeUVWx16_Any_LSX(const uint8_t* src,
uint8_t* dst_b,
int dst_stride_b,
int width);
+void TransposeWxH_16_C(const uint16_t* src,
+ int src_stride,
+ uint16_t* dst,
+ int dst_stride,
+ int width,
+ int height);
+
+void TransposeWx8_16_C(const uint16_t* src,
+ int src_stride,
+ uint16_t* dst,
+ int dst_stride,
+ int width);
+void TransposeWx1_16_C(const uint16_t* src,
+ int src_stride,
+ uint16_t* dst,
+ int dst_stride,
+ int width);
+
+// Transpose 32 bit values (ARGB)
+void Transpose4x4_32_NEON(const uint8_t* src,
+ int src_stride,
+ uint8_t* dst,
+ int dst_stride,
+ int width);
+
+void Transpose4x4_32_SSE2(const uint8_t* src,
+ int src_stride,
+ uint8_t* dst,
+ int dst_stride,
+ int width);
+
+void Transpose4x4_32_AVX2(const uint8_t* src,
+ int src_stride,
+ uint8_t* dst,
+ int dst_stride,
+ int width);
+
+void Transpose4x4_32_C(const uint8_t* src,
+ int src_stride,
+ uint8_t* dst,
+ int dst_stride,
+ int width);
#ifdef __cplusplus
} // extern "C"
diff --git a/files/include/libyuv/row.h b/include/libyuv/row.h
index 1a1cf4b6..46685a50 100644
--- a/files/include/libyuv/row.h
+++ b/include/libyuv/row.h
@@ -11,7 +11,8 @@
#ifndef INCLUDE_LIBYUV_ROW_H_
#define INCLUDE_LIBYUV_ROW_H_
-#include <stdlib.h> // For malloc.
+#include <stddef.h> // For NULL
+#include <stdlib.h> // For malloc
#include "libyuv/basic_types.h"
@@ -30,7 +31,10 @@ extern "C" {
#endif
// MemorySanitizer does not support assembly code yet. http://crbug.com/344505
#if defined(__has_feature)
-#if __has_feature(memory_sanitizer)
+#if __has_feature(memory_sanitizer) && !defined(LIBYUV_DISABLE_NEON)
+#define LIBYUV_DISABLE_NEON
+#endif
+#if __has_feature(memory_sanitizer) && !defined(LIBYUV_DISABLE_X86)
#define LIBYUV_DISABLE_X86
#endif
#endif
@@ -75,9 +79,6 @@ extern "C" {
(defined(_M_IX86) || defined(__x86_64__) || defined(__i386__))
// Conversions:
#define HAS_ABGRTOYROW_SSSE3
-#if !defined(LIBYUV_BIT_EXACT)
-#define HAS_ABGRTOUVROW_SSSE3
-#endif
#define HAS_ARGB1555TOARGBROW_SSE2
#define HAS_ARGB4444TOARGBROW_SSE2
#define HAS_ARGBEXTRACTALPHAROW_SSE2
@@ -92,12 +93,6 @@ extern "C" {
#define HAS_ARGBTOYJROW_SSSE3
#define HAS_ARGBTOYROW_SSSE3
#define HAS_BGRATOYROW_SSSE3
-#if !defined(LIBYUV_BIT_EXACT)
-#define HAS_ARGBTOUV444ROW_SSSE3
-#define HAS_ARGBTOUVJROW_SSSE3
-#define HAS_ARGBTOUVROW_SSSE3
-#define HAS_BGRATOUVROW_SSSE3
-#endif
#define HAS_COPYROW_ERMS
#define HAS_COPYROW_SSE2
#define HAS_H422TOARGBROW_SSSE3
@@ -111,6 +106,7 @@ extern "C" {
#define HAS_I422TOUYVYROW_SSE2
#define HAS_I422TOYUY2ROW_SSE2
#define HAS_I444TOARGBROW_SSSE3
+#define HAS_I444TORGB24ROW_SSSE3
#define HAS_INTERPOLATEROW_SSSE3
#define HAS_J400TOARGBROW_SSE2
#define HAS_J422TOARGBROW_SSSE3
@@ -124,16 +120,13 @@ extern "C" {
#define HAS_NV21TORGB24ROW_SSSE3
#define HAS_RAWTOARGBROW_SSSE3
#define HAS_RAWTORGB24ROW_SSSE3
+#define HAS_RAWTOYJROW_SSSE3
#define HAS_RAWTOYROW_SSSE3
#define HAS_RGB24TOARGBROW_SSSE3
+#define HAS_RGB24TOYJROW_SSSE3
#define HAS_RGB24TOYROW_SSSE3
#define HAS_RGB565TOARGBROW_SSE2
#define HAS_RGBATOYROW_SSSE3
-#if !defined(LIBYUV_BIT_EXACT)
-#define HAS_RGB24TOYJROW_SSSE3
-#define HAS_RAWTOYJROW_SSSE3
-#define HAS_RGBATOUVROW_SSSE3
-#endif
#define HAS_SETROW_ERMS
#define HAS_SETROW_X86
#define HAS_SPLITUVROW_SSE2
@@ -145,13 +138,18 @@ extern "C" {
#define HAS_YUY2TOUV422ROW_SSE2
#define HAS_YUY2TOUVROW_SSE2
#define HAS_YUY2TOYROW_SSE2
+#if !defined(LIBYUV_BIT_EXACT)
+#define HAS_ABGRTOUVROW_SSSE3
+#define HAS_ARGBTOUV444ROW_SSSE3
+#define HAS_ARGBTOUVJROW_SSSE3
+#define HAS_ARGBTOUVROW_SSSE3
+#define HAS_BGRATOUVROW_SSSE3
+#define HAS_RGBATOUVROW_SSSE3
+#endif
// Effects:
#define HAS_ARGBADDROW_SSE2
#define HAS_ARGBAFFINEROW_SSE2
-#if !defined(LIBYUV_BIT_EXACT)
-#define HAS_ARGBATTENUATEROW_SSSE3
-#endif
#define HAS_ARGBBLENDROW_SSSE3
#define HAS_ARGBCOLORMATRIXROW_SSSE3
#define HAS_ARGBCOLORTABLEROW_X86
@@ -166,7 +164,6 @@ extern "C" {
#define HAS_ARGBSEPIAROW_SSSE3
#define HAS_ARGBSHADEROW_SSE2
#define HAS_ARGBSUBTRACTROW_SSE2
-#define HAS_ARGBUNATTENUATEROW_SSE2
#define HAS_BLENDPLANEROW_SSSE3
#define HAS_COMPUTECUMULATIVESUMROW_SSE2
#define HAS_CUMULATIVESUMTOAVERAGEROW_SSE2
@@ -201,17 +198,10 @@ extern "C" {
#define HAS_ARGBSHUFFLEROW_AVX2
#define HAS_ARGBTORGB565DITHERROW_AVX2
#define HAS_ARGBTOYJROW_AVX2
-#define HAS_RAWTOYJROW_AVX2
-#define HAS_RGB24TOYJROW_AVX2
#define HAS_ARGBTOYROW_AVX2
-#if !defined(LIBYUV_BIT_EXACT)
-#define HAS_ARGBTOUVJROW_AVX2
-#define HAS_ARGBTOUVROW_AVX2
-#endif
#define HAS_COPYROW_AVX
#define HAS_H422TOARGBROW_AVX2
#define HAS_HALFFLOATROW_AVX2
-// #define HAS_HALFFLOATROW_F16C // Enable to test halffloat cast
#define HAS_I422TOARGB1555ROW_AVX2
#define HAS_I422TOARGB4444ROW_AVX2
#define HAS_I422TOARGBROW_AVX2
@@ -219,6 +209,7 @@ extern "C" {
#define HAS_I422TORGB565ROW_AVX2
#define HAS_I422TORGBAROW_AVX2
#define HAS_I444TOARGBROW_AVX2
+#define HAS_I444TORGB24ROW_AVX2
#define HAS_INTERPOLATEROW_AVX2
#define HAS_J422TOARGBROW_AVX2
#define HAS_MERGEUVROW_AVX2
@@ -228,6 +219,8 @@ extern "C" {
#define HAS_NV12TORGB565ROW_AVX2
#define HAS_NV21TOARGBROW_AVX2
#define HAS_NV21TORGB24ROW_AVX2
+#define HAS_RAWTOYJROW_AVX2
+#define HAS_RGB24TOYJROW_AVX2
#define HAS_SPLITUVROW_AVX2
#define HAS_UYVYTOARGBROW_AVX2
#define HAS_UYVYTOUV422ROW_AVX2
@@ -237,15 +230,16 @@ extern "C" {
#define HAS_YUY2TOUV422ROW_AVX2
#define HAS_YUY2TOUVROW_AVX2
#define HAS_YUY2TOYROW_AVX2
+// #define HAS_HALFFLOATROW_F16C // Enable to test half float cast
+#if !defined(LIBYUV_BIT_EXACT)
+#define HAS_ARGBTOUVJROW_AVX2
+#define HAS_ARGBTOUVROW_AVX2
+#endif
// Effects:
#define HAS_ARGBADDROW_AVX2
-#if !defined(LIBYUV_BIT_EXACT)
-#define HAS_ARGBATTENUATEROW_AVX2
-#endif
#define HAS_ARGBMULTIPLYROW_AVX2
#define HAS_ARGBSUBTRACTROW_AVX2
-#define HAS_ARGBUNATTENUATEROW_AVX2
#define HAS_BLENDPLANEROW_AVX2
#if defined(__x86_64__) || !defined(__pic__) || defined(__clang__) || \
@@ -282,28 +276,33 @@ extern "C" {
// The following are available for gcc/clang x86 platforms:
// TODO(fbarchard): Port to Visual C
#if !defined(LIBYUV_DISABLE_X86) && (defined(__x86_64__) || defined(__i386__))
+#define HAS_AB64TOARGBROW_SSSE3
#define HAS_ABGRTOAR30ROW_SSSE3
+#define HAS_ABGRTOYJROW_SSSE3
+#define HAS_AR64TOARGBROW_SSSE3
+#define HAS_ARGBATTENUATEROW_SSSE3
+#define HAS_ARGBTOAB64ROW_SSSE3
#define HAS_ARGBTOAR30ROW_SSSE3
#define HAS_ARGBTOAR64ROW_SSSE3
-#define HAS_ARGBTOAB64ROW_SSSE3
-#define HAS_AR64TOARGBROW_SSSE3
-#define HAS_AB64TOARGBROW_SSSE3
+#define HAS_ARGBUNATTENUATEROW_SSE2
#define HAS_CONVERT16TO8ROW_SSSE3
#define HAS_CONVERT8TO16ROW_SSE2
+#define HAS_DETILEROW_16_SSE2
#define HAS_DETILEROW_SSE2
#define HAS_DETILESPLITUVROW_SSSE3
+#define HAS_DETILETOYUY2_SSE2
#define HAS_HALFMERGEUVROW_SSSE3
#define HAS_I210TOAR30ROW_SSSE3
#define HAS_I210TOARGBROW_SSSE3
#define HAS_I212TOAR30ROW_SSSE3
#define HAS_I212TOARGBROW_SSSE3
#define HAS_I400TOARGBROW_SSE2
-#define HAS_I422TOAR30ROW_SSSE3
#define HAS_I410TOAR30ROW_SSSE3
#define HAS_I410TOARGBROW_SSSE3
+#define HAS_I422TOAR30ROW_SSSE3
#define HAS_MERGEARGBROW_SSE2
-#define HAS_MERGEXRGBROW_SSE2
#define HAS_MERGERGBROW_SSSE3
+#define HAS_MERGEXRGBROW_SSE2
#define HAS_MIRRORUVROW_SSSE3
#define HAS_NV21TOYUV24ROW_SSSE3
#define HAS_P210TOAR30ROW_SSSE3
@@ -312,15 +311,17 @@ extern "C" {
#define HAS_P410TOARGBROW_SSSE3
#define HAS_RAWTORGBAROW_SSSE3
#define HAS_RGB24MIRRORROW_SSSE3
-#if !defined(LIBYUV_BIT_EXACT)
#define HAS_RGBATOYJROW_SSSE3
-#endif
#define HAS_SPLITARGBROW_SSE2
#define HAS_SPLITARGBROW_SSSE3
+#define HAS_SPLITRGBROW_SSSE3
#define HAS_SPLITXRGBROW_SSE2
#define HAS_SPLITXRGBROW_SSSE3
-#define HAS_SPLITRGBROW_SSSE3
#define HAS_SWAPUVROW_SSSE3
+#define HAS_YUY2TONVUVROW_SSE2
+#if !defined(LIBYUV_BIT_EXACT)
+#define HAS_ABGRTOUVJROW_SSSE3
+#endif
#if defined(__x86_64__) || !defined(__pic__)
// TODO(fbarchard): fix build error on android_full_debug=1
@@ -335,31 +336,23 @@ extern "C" {
#if !defined(LIBYUV_DISABLE_X86) && \
(defined(__x86_64__) || defined(__i386__)) && \
(defined(CLANG_HAS_AVX2) || defined(GCC_HAS_AVX2))
+#define HAS_AB64TOARGBROW_AVX2
#define HAS_ABGRTOAR30ROW_AVX2
-#if !defined(LIBYUV_BIT_EXACT)
-#define HAS_ABGRTOUVROW_AVX2
+#define HAS_ABGRTOYJROW_AVX2
#define HAS_ABGRTOYROW_AVX2
-#endif
+#define HAS_AR64TOARGBROW_AVX2
+#define HAS_ARGBATTENUATEROW_AVX2
+#define HAS_ARGBTOAB64ROW_AVX2
#define HAS_ARGBTOAR30ROW_AVX2
+#define HAS_ARGBTOAR64ROW_AVX2
#define HAS_ARGBTORAWROW_AVX2
#define HAS_ARGBTORGB24ROW_AVX2
-#define HAS_ARGBTOAR64ROW_AVX2
-#define HAS_ARGBTOAB64ROW_AVX2
-#define HAS_AR64TOARGBROW_AVX2
-#define HAS_AB64TOARGBROW_AVX2
+#define HAS_ARGBUNATTENUATEROW_AVX2
#define HAS_CONVERT16TO8ROW_AVX2
-#define HAS_INTERPOLATEROW_16TO8_AVX2
#define HAS_CONVERT8TO16ROW_AVX2
+#define HAS_DETILEROW_16_AVX
#define HAS_DIVIDEROW_16_AVX2
#define HAS_HALFMERGEUVROW_AVX2
-#define HAS_MERGEAR64ROW_AVX2
-#define HAS_MERGEARGB16TO8ROW_AVX2
-#define HAS_MERGEARGBROW_AVX2
-#define HAS_MERGEXR30ROW_AVX2
-#define HAS_MERGEXR64ROW_AVX2
-#define HAS_MERGEXRGB16TO8ROW_AVX2
-#define HAS_MERGEXRGBROW_AVX2
-#define HAS_NV21TOYUV24ROW_AVX2
#define HAS_I210TOAR30ROW_AVX2
#define HAS_I210TOARGBROW_AVX2
#define HAS_I212TOAR30ROW_AVX2
@@ -367,23 +360,35 @@ extern "C" {
#define HAS_I400TOARGBROW_AVX2
#define HAS_I410TOAR30ROW_AVX2
#define HAS_I410TOARGBROW_AVX2
-#define HAS_P210TOAR30ROW_AVX2
-#define HAS_P210TOARGBROW_AVX2
-#define HAS_P410TOAR30ROW_AVX2
-#define HAS_P410TOARGBROW_AVX2
#define HAS_I422TOAR30ROW_AVX2
#define HAS_I422TOUYVYROW_AVX2
#define HAS_I422TOYUY2ROW_AVX2
+#define HAS_INTERPOLATEROW_16TO8_AVX2
+#define HAS_MERGEAR64ROW_AVX2
+#define HAS_MERGEARGB16TO8ROW_AVX2
+#define HAS_MERGEARGBROW_AVX2
#define HAS_MERGEUVROW_16_AVX2
+#define HAS_MERGEXR30ROW_AVX2
+#define HAS_MERGEXR64ROW_AVX2
+#define HAS_MERGEXRGB16TO8ROW_AVX2
+#define HAS_MERGEXRGBROW_AVX2
#define HAS_MIRRORUVROW_AVX2
#define HAS_MULTIPLYROW_16_AVX2
-#if !defined(LIBYUV_BIT_EXACT)
+#define HAS_NV21TOYUV24ROW_AVX2
+#define HAS_P210TOAR30ROW_AVX2
+#define HAS_P210TOARGBROW_AVX2
+#define HAS_P410TOAR30ROW_AVX2
+#define HAS_P410TOARGBROW_AVX2
#define HAS_RGBATOYJROW_AVX2
-#endif
#define HAS_SPLITARGBROW_AVX2
-#define HAS_SPLITXRGBROW_AVX2
#define HAS_SPLITUVROW_16_AVX2
+#define HAS_SPLITXRGBROW_AVX2
#define HAS_SWAPUVROW_AVX2
+#define HAS_YUY2TONVUVROW_AVX2
+#if !defined(LIBYUV_BIT_EXACT)
+#define HAS_ABGRTOUVJROW_AVX2
+#define HAS_ABGRTOUVROW_AVX2
+#endif
#if defined(__x86_64__) || !defined(__pic__)
// TODO(fbarchard): fix build error on android_full_debug=1
@@ -397,8 +402,9 @@ extern "C" {
// TODO(fbarchard): Port to GCC and Visual C
// TODO(fbarchard): re-enable HAS_ARGBTORGB24ROW_AVX512VBMI. Issue libyuv:789
#if !defined(LIBYUV_DISABLE_X86) && \
- (defined(__x86_64__) || defined(__i386__)) && (defined(CLANG_HAS_AVX512))
+ (defined(__x86_64__) || defined(__i386__)) && defined(CLANG_HAS_AVX512)
#define HAS_ARGBTORGB24ROW_AVX512VBMI
+#define HAS_MERGEUVROW_AVX512BW
#endif
// The following are available for AVX512 clang x64 platforms:
@@ -412,7 +418,9 @@ extern "C" {
#if !defined(LIBYUV_DISABLE_NEON) && \
(defined(__aarch64__) || defined(__ARM_NEON__) || defined(LIBYUV_NEON))
#define HAS_AB64TOARGBROW_NEON
+#define HAS_ABGRTOUVJROW_NEON
#define HAS_ABGRTOUVROW_NEON
+#define HAS_ABGRTOYJROW_NEON
#define HAS_ABGRTOYROW_NEON
#define HAS_AR64TOARGBROW_NEON
#define HAS_ARGB1555TOARGBROW_NEON
@@ -444,8 +452,11 @@ extern "C" {
#define HAS_BYTETOFLOATROW_NEON
#define HAS_CONVERT16TO8ROW_NEON
#define HAS_COPYROW_NEON
+#define HAS_DETILEROW_16_NEON
#define HAS_DETILEROW_NEON
#define HAS_DETILESPLITUVROW_NEON
+#define HAS_DETILETOYUY2_NEON
+#define HAS_UNPACKMT2T_NEON
#define HAS_DIVIDEROW_16_NEON
#define HAS_HALFFLOATROW_NEON
#define HAS_HALFMERGEUVROW_NEON
@@ -461,6 +472,7 @@ extern "C" {
#define HAS_I422TOYUY2ROW_NEON
#define HAS_I444ALPHATOARGBROW_NEON
#define HAS_I444TOARGBROW_NEON
+#define HAS_I444TORGB24ROW_NEON
#define HAS_INTERPOLATEROW_16_NEON
#define HAS_INTERPOLATEROW_NEON
#define HAS_J400TOARGBROW_NEON
@@ -513,6 +525,7 @@ extern "C" {
#define HAS_UYVYTOUVROW_NEON
#define HAS_UYVYTOYROW_NEON
#define HAS_YUY2TOARGBROW_NEON
+#define HAS_YUY2TONVUVROW_NEON
#define HAS_YUY2TOUV422ROW_NEON
#define HAS_YUY2TOUVROW_NEON
#define HAS_YUY2TOYROW_NEON
@@ -524,13 +537,13 @@ extern "C" {
#define HAS_ARGBCOLORMATRIXROW_NEON
#define HAS_ARGBGRAYROW_NEON
#define HAS_ARGBMIRRORROW_NEON
-#define HAS_RGB24MIRRORROW_NEON
#define HAS_ARGBMULTIPLYROW_NEON
#define HAS_ARGBQUANTIZEROW_NEON
#define HAS_ARGBSEPIAROW_NEON
#define HAS_ARGBSHADEROW_NEON
#define HAS_ARGBSHUFFLEROW_NEON
#define HAS_ARGBSUBTRACTROW_NEON
+#define HAS_RGB24MIRRORROW_NEON
#define HAS_SOBELROW_NEON
#define HAS_SOBELTOPLANEROW_NEON
#define HAS_SOBELXROW_NEON
@@ -540,12 +553,13 @@ extern "C" {
// The following are available on AArch64 platforms:
#if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
+#define HAS_GAUSSCOL_F32_NEON
+#define HAS_GAUSSROW_F32_NEON
#define HAS_INTERPOLATEROW_16TO8_NEON
#define HAS_SCALESUMSAMPLES_NEON
-#define HAS_GAUSSROW_F32_NEON
-#define HAS_GAUSSCOL_F32_NEON
#endif
#if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa)
+#define HAS_ABGRTOUVJROW_MSA
#define HAS_ABGRTOUVROW_MSA
#define HAS_ABGRTOYROW_MSA
#define HAS_ARGB1555TOARGBROW_MSA
@@ -581,27 +595,25 @@ extern "C" {
#define HAS_BGRATOYROW_MSA
#define HAS_HALFFLOATROW_MSA
#define HAS_I400TOARGBROW_MSA
-#define HAS_I422TOUYVYROW_MSA
-#define HAS_I422TOYUY2ROW_MSA
-#define HAS_I422TOARGBROW_MSA
-#define HAS_I422TORGBAROW_MSA
#define HAS_I422ALPHATOARGBROW_MSA
+#define HAS_I422TOARGB1555ROW_MSA
+#define HAS_I422TOARGB4444ROW_MSA
+#define HAS_I422TOARGBROW_MSA
#define HAS_I422TORGB24ROW_MSA
#define HAS_I422TORGB565ROW_MSA
-#define HAS_I422TOARGB4444ROW_MSA
-#define HAS_I422TOARGB1555ROW_MSA
-#define HAS_NV12TOARGBROW_MSA
-#define HAS_NV12TORGB565ROW_MSA
-#define HAS_NV21TOARGBROW_MSA
-#define HAS_YUY2TOARGBROW_MSA
-#define HAS_UYVYTOARGBROW_MSA
+#define HAS_I422TORGBAROW_MSA
+#define HAS_I422TOUYVYROW_MSA
+#define HAS_I422TOYUY2ROW_MSA
#define HAS_I444TOARGBROW_MSA
#define HAS_INTERPOLATEROW_MSA
#define HAS_J400TOARGBROW_MSA
#define HAS_MERGEUVROW_MSA
#define HAS_MIRRORROW_MSA
-#define HAS_MIRRORUVROW_MSA
#define HAS_MIRRORSPLITUVROW_MSA
+#define HAS_MIRRORUVROW_MSA
+#define HAS_NV12TOARGBROW_MSA
+#define HAS_NV12TORGB565ROW_MSA
+#define HAS_NV21TOARGBROW_MSA
#define HAS_RAWTOARGBROW_MSA
#define HAS_RAWTORGB24ROW_MSA
#define HAS_RAWTOUVROW_MSA
@@ -621,113 +633,226 @@ extern "C" {
#define HAS_SOBELXYROW_MSA
#define HAS_SOBELYROW_MSA
#define HAS_SPLITUVROW_MSA
+#define HAS_UYVYTOARGBROW_MSA
#define HAS_UYVYTOUVROW_MSA
#define HAS_UYVYTOYROW_MSA
+#define HAS_YUY2TOARGBROW_MSA
#define HAS_YUY2TOUV422ROW_MSA
#define HAS_YUY2TOUVROW_MSA
#define HAS_YUY2TOYROW_MSA
#endif
#if !defined(LIBYUV_DISABLE_LSX) && defined(__loongarch_sx)
-#define HAS_ARGB4444TOARGBROW_LSX
+#define HAS_ABGRTOUVROW_LSX
+#define HAS_ABGRTOYROW_LSX
#define HAS_ARGB1555TOARGBROW_LSX
-#define HAS_RGB565TOARGBROW_LSX
-#define HAS_RGB24TOARGBROW_LSX
-#define HAS_RAWTOARGBROW_LSX
-#define HAS_ARGB1555TOYROW_LSX
#define HAS_ARGB1555TOUVROW_LSX
-#define HAS_RGB565TOYROW_LSX
-#define HAS_RGB565TOUVROW_LSX
-#define HAS_RGB24TOYROW_LSX
-#define HAS_RGB24TOUVROW_LSX
-#define HAS_RAWTOYROW_LSX
-#define HAS_RAWTOUVROW_LSX
+#define HAS_ARGB1555TOYROW_LSX
+#define HAS_ARGB4444TOARGBROW_LSX
+#define HAS_ARGBADDROW_LSX
+#define HAS_ARGBATTENUATEROW_LSX
+#define HAS_ARGBBLENDROW_LSX
+#define HAS_ARGBCOLORMATRIXROW_LSX
+#define HAS_ARGBEXTRACTALPHAROW_LSX
+#define HAS_ARGBGRAYROW_LSX
+#define HAS_ARGBSEPIAROW_LSX
+#define HAS_ARGBSHADEROW_LSX
+#define HAS_ARGBSHUFFLEROW_LSX
+#define HAS_ARGBSUBTRACTROW_LSX
+#define HAS_ARGBQUANTIZEROW_LSX
+#define HAS_ARGBSETROW_LSX
+#define HAS_ARGBTOARGB1555ROW_LSX
+#define HAS_ARGBTOARGB4444ROW_LSX
+#define HAS_ARGBTORAWROW_LSX
+#define HAS_ARGBTORGB24ROW_LSX
+#define HAS_ARGBTORGB565ROW_LSX
+#define HAS_ARGBTORGB565DITHERROW_LSX
+#define HAS_ARGBTOUVJROW_LSX
+#define HAS_ARGBTOUV444ROW_LSX
+#define HAS_ARGBTOUVROW_LSX
+#define HAS_ARGBTOYJROW_LSX
+#define HAS_ARGBMIRRORROW_LSX
+#define HAS_ARGBMULTIPLYROW_LSX
+#define HAS_BGRATOUVROW_LSX
+#define HAS_BGRATOYROW_LSX
+#define HAS_I400TOARGBROW_LSX
+#define HAS_I444TOARGBROW_LSX
+#define HAS_INTERPOLATEROW_LSX
+#define HAS_I422ALPHATOARGBROW_LSX
+#define HAS_I422TOARGB1555ROW_LSX
+#define HAS_I422TOARGB4444ROW_LSX
+#define HAS_I422TORGB24ROW_LSX
+#define HAS_I422TORGB565ROW_LSX
+#define HAS_I422TORGBAROW_LSX
+#define HAS_I422TOUYVYROW_LSX
+#define HAS_I422TOYUY2ROW_LSX
+#define HAS_J400TOARGBROW_LSX
+#define HAS_MERGEUVROW_LSX
+#define HAS_MIRRORROW_LSX
+#define HAS_MIRRORUVROW_LSX
+#define HAS_MIRRORSPLITUVROW_LSX
#define HAS_NV12TOARGBROW_LSX
#define HAS_NV12TORGB565ROW_LSX
#define HAS_NV21TOARGBROW_LSX
+#define HAS_RAWTOARGBROW_LSX
+#define HAS_RAWTORGB24ROW_LSX
+#define HAS_RAWTOUVROW_LSX
+#define HAS_RAWTOYROW_LSX
+#define HAS_RGB24TOARGBROW_LSX
+#define HAS_RGB24TOUVROW_LSX
+#define HAS_RGB24TOYROW_LSX
+#define HAS_RGB565TOARGBROW_LSX
+#define HAS_RGB565TOUVROW_LSX
+#define HAS_RGB565TOYROW_LSX
+#define HAS_RGBATOUVROW_LSX
+#define HAS_RGBATOYROW_LSX
+#define HAS_SETROW_LSX
#define HAS_SOBELROW_LSX
#define HAS_SOBELTOPLANEROW_LSX
#define HAS_SOBELXYROW_LSX
-#define HAS_ARGBTOYJROW_LSX
-#define HAS_BGRATOYROW_LSX
-#define HAS_BGRATOUVROW_LSX
-#define HAS_ABGRTOYROW_LSX
-#define HAS_ABGRTOUVROW_LSX
-#define HAS_RGBATOYROW_LSX
-#define HAS_RGBATOUVROW_LSX
-#define HAS_ARGBTOUVJROW_LSX
-#define HAS_I444TOARGBROW_LSX
-#define HAS_I400TOARGBROW_LSX
-#define HAS_J400TOARGBROW_LSX
-#define HAS_YUY2TOARGBROW_LSX
-#define HAS_UYVYTOARGBROW_LSX
-#define HAS_INTERPOLATEROW_LSX
-#define HAS_ARGBSETROW_LSX
-#define HAS_RAWTORGB24ROW_LSX
-#define HAS_MERGEUVROW_LSX
-#define HAS_ARGBEXTRACTALPHAROW_LSX
-#define HAS_ARGBBLENDROW_LSX
-#define HAS_ARGBQUANTIZEROW_LSX
-#define HAS_ARGBCOLORMATRIXROW_LSX
#define HAS_SPLITUVROW_LSX
-#define HAS_SETROW_LSX
-#define HAS_MIRRORSPLITUVROW_LSX
+#define HAS_UYVYTOARGBROW_LSX
+#define HAS_UYVYTOUV422ROW_LSX
+#define HAS_UYVYTOUVROW_LSX
+#define HAS_UYVYTOYROW_LSX
+#define HAS_YUY2TOARGBROW_LSX
+#define HAS_YUY2TOUVROW_LSX
+#define HAS_YUY2TOUV422ROW_LSX
+#define HAS_YUY2TOYROW_LSX
+#define HAS_ARGBTOYROW_LSX
+#define HAS_ABGRTOYJROW_LSX
+#define HAS_RGBATOYJROW_LSX
+#define HAS_RGB24TOYJROW_LSX
+#define HAS_RAWTOYJROW_LSX
+#endif
+
+#if !defined(LIBYUV_DISABLE_LSX) && defined(__loongarch_sx)
+#define HAS_I422TOARGBROW_LSX
#endif
#if !defined(LIBYUV_DISABLE_LASX) && defined(__loongarch_asx)
+#define HAS_ARGB1555TOARGBROW_LASX
+#define HAS_ARGB1555TOUVROW_LASX
+#define HAS_ARGB1555TOYROW_LASX
+#define HAS_ARGB4444TOARGBROW_LASX
+#define HAS_ARGBADDROW_LASX
+#define HAS_ARGBATTENUATEROW_LASX
+#define HAS_ARGBGRAYROW_LASX
+#define HAS_ARGBMIRRORROW_LASX
+#define HAS_ARGBMULTIPLYROW_LASX
+#define HAS_ARGBSEPIAROW_LASX
+#define HAS_ARGBSHADEROW_LASX
+#define HAS_ARGBSHUFFLEROW_LASX
+#define HAS_ARGBSUBTRACTROW_LASX
+#define HAS_ARGBTOARGB1555ROW_LASX
+#define HAS_ARGBTOARGB4444ROW_LASX
+#define HAS_ARGBTORAWROW_LASX
+#define HAS_ARGBTORGB24ROW_LASX
+#define HAS_ARGBTORGB565DITHERROW_LASX
+#define HAS_ARGBTORGB565ROW_LASX
+#define HAS_ARGBTOUV444ROW_LASX
+#define HAS_ARGBTOUVJROW_LASX
+#define HAS_ARGBTOUVROW_LASX
+#define HAS_ARGBTOYJROW_LASX
+#define HAS_ARGBTOYROW_LASX
+#define HAS_ABGRTOYJROW_LASX
+#define HAS_ABGRTOYROW_LASX
+#define HAS_I422ALPHATOARGBROW_LASX
+#define HAS_I422TOARGB1555ROW_LASX
+#define HAS_I422TOARGB4444ROW_LASX
#define HAS_I422TOARGBROW_LASX
+#define HAS_I422TORGB24ROW_LASX
+#define HAS_I422TORGB565ROW_LASX
#define HAS_I422TORGBAROW_LASX
-#define HAS_I422ALPHATOARGBROW_LASX
-#define HAS_I422TOYUY2ROW_LASX
#define HAS_I422TOUYVYROW_LASX
+#define HAS_I422TOYUY2ROW_LASX
#define HAS_MIRRORROW_LASX
#define HAS_MIRRORUVROW_LASX
-#define HAS_ARGBMIRRORROW_LASX
-#define HAS_I422TORGB24ROW_LASX
-#define HAS_I422TORGB565ROW_LASX
-#define HAS_I422TOARGB4444ROW_LASX
-#define HAS_I422TOARGB1555ROW_LASX
-#define HAS_YUY2TOUVROW_LASX
-#define HAS_YUY2TOYROW_LASX
-#define HAS_YUY2TOUV422ROW_LASX
-#define HAS_UYVYTOYROW_LASX
-#define HAS_UYVYTOUVROW_LASX
-#define HAS_UYVYTOUV422ROW_LASX
-#define HAS_ARGBTOYROW_LASX
-#define HAS_ARGBTOUVROW_LASX
-#define HAS_ARGBTORGB24ROW_LASX
-#define HAS_ARGBTORAWROW_LASX
-#define HAS_ARGBTORGB565ROW_LASX
-#define HAS_ARGBTOARGB1555ROW_LASX
-#define HAS_ARGBTOARGB4444ROW_LASX
-#define HAS_ARGBTOUV444ROW_LASX
-#define HAS_ARGBMULTIPLYROW_LASX
-#define HAS_ARGBADDROW_LASX
-#define HAS_ARGBSUBTRACTROW_LASX
-#define HAS_ARGBATTENUATEROW_LASX
-#define HAS_ARGBTORGB565DITHERROW_LASX
-#define HAS_ARGBSHUFFLEROW_LASX
-#define HAS_ARGBSHADEROW_LASX
-#define HAS_ARGBGRAYROW_LASX
-#define HAS_ARGBSEPIAROW_LASX
-#define HAS_ARGB4444TOARGBROW_LASX
-#define HAS_ARGB1555TOARGBROW_LASX
-#define HAS_RGB565TOARGBROW_LASX
-#define HAS_RGB24TOARGBROW_LASX
-#define HAS_RAWTOARGBROW_LASX
-#define HAS_ARGB1555TOYROW_LASX
-#define HAS_ARGB1555TOUVROW_LASX
-#define HAS_RGB565TOYROW_LASX
-#define HAS_RGB565TOUVROW_LASX
-#define HAS_RGB24TOYROW_LASX
-#define HAS_RGB24TOUVROW_LASX
-#define HAS_RAWTOYROW_LASX
-#define HAS_RAWTOUVROW_LASX
#define HAS_NV12TOARGBROW_LASX
#define HAS_NV12TORGB565ROW_LASX
#define HAS_NV21TOARGBROW_LASX
-#define HAS_ARGBTOYJROW_LASX
-#define HAS_ARGBTOUVJROW_LASX
+#define HAS_RAWTOARGBROW_LASX
+#define HAS_RAWTOUVROW_LASX
+#define HAS_RAWTOYROW_LASX
+#define HAS_RGB24TOARGBROW_LASX
+#define HAS_RGB24TOUVROW_LASX
+#define HAS_RGB24TOYROW_LASX
+#define HAS_RGB565TOARGBROW_LASX
+#define HAS_RGB565TOUVROW_LASX
+#define HAS_RGB565TOYROW_LASX
+#define HAS_UYVYTOUV422ROW_LASX
+#define HAS_UYVYTOUVROW_LASX
+#define HAS_UYVYTOYROW_LASX
+#define HAS_YUY2TOUV422ROW_LASX
+#define HAS_YUY2TOUVROW_LASX
+#define HAS_YUY2TOYROW_LASX
+#define HAS_RGBATOYROW_LASX
+#define HAS_RGBATOYJROW_LASX
+#define HAS_BGRATOYROW_LASX
+#define HAS_RGB24TOYJROW_LASX
+#define HAS_RAWTOYJROW_LASX
+#endif
+
+#if !defined(LIBYUV_DISABLE_RVV) && defined(__riscv_vector)
+#define HAS_COPYROW_RVV
+#if __riscv_v_intrinsic == 11000
+#define HAS_AB64TOARGBROW_RVV
+#define HAS_ABGRTOYJROW_RVV
+#define HAS_ABGRTOYROW_RVV
+#define HAS_AR64TOARGBROW_RVV
+#define HAS_AR64TOAB64ROW_RVV
+#define HAS_ARGBATTENUATEROW_RVV
+#define HAS_ARGBBLENDROW_RVV
+#define HAS_ARGBCOPYYTOALPHAROW_RVV
+#define HAS_ARGBEXTRACTALPHAROW_RVV
+#define HAS_ARGBTOAB64ROW_RVV
+#define HAS_ARGBTOABGRROW_RVV
+#define HAS_ARGBTOAR64ROW_RVV
+#define HAS_ARGBTOBGRAROW_RVV
+#define HAS_ARGBTORAWROW_RVV
+#define HAS_ARGBTORGB24ROW_RVV
+#define HAS_ARGBTORGBAROW_RVV
+#define HAS_ARGBTOYJROW_RVV
+#define HAS_ARGBTOYMATRIXROW_RVV
+#define HAS_ARGBTOYROW_RVV
+#define HAS_BGRATOYROW_RVV
+#define HAS_BLENDPLANEROW_RVV
+#define HAS_I400TOARGBROW_RVV
+#define HAS_I422ALPHATOARGBROW_RVV
+#define HAS_I422TOARGBROW_RVV
+#define HAS_I422TORGB24ROW_RVV
+#define HAS_I422TORGBAROW_RVV
+#define HAS_I444ALPHATOARGBROW_RVV
+#define HAS_I444TOARGBROW_RVV
+#define HAS_I444TORGB24ROW_RVV
+#define HAS_INTERPOLATEROW_RVV
+#define HAS_J400TOARGBROW_RVV
+#define HAS_MERGEARGBROW_RVV
+#define HAS_MERGERGBROW_RVV
+#define HAS_MERGEUVROW_RVV
+#define HAS_MERGEXRGBROW_RVV
+#define HAS_NV12TOARGBROW_RVV
+#define HAS_NV12TORGB24ROW_RVV
+#define HAS_NV21TOARGBROW_RVV
+#define HAS_NV21TORGB24ROW_RVV
+#define HAS_RAWTOARGBROW_RVV
+#define HAS_RAWTORGB24ROW_RVV
+#define HAS_RAWTORGBAROW_RVV
+#define HAS_RAWTOYJROW_RVV
+#define HAS_RAWTOYROW_RVV
+#define HAS_RGB24TOARGBROW_RVV
+#define HAS_RGB24TOYJROW_RVV
+#define HAS_RGB24TOYROW_RVV
+#define HAS_RGBATOARGBROW_RVV
+#define HAS_RGBATOYJROW_RVV
+#define HAS_RGBATOYMATRIXROW_RVV
+#define HAS_RGBATOYROW_RVV
+#define HAS_RGBTOYMATRIXROW_RVV
+#define HAS_SPLITARGBROW_RVV
+#define HAS_SPLITRGBROW_RVV
+#define HAS_SPLITUVROW_RVV
+#define HAS_SPLITXRGBROW_RVV
+#endif
#endif
#if defined(_MSC_VER) && !defined(__CLR_VER) && !defined(__clang__)
@@ -789,8 +914,8 @@ typedef uint32_t ulvec32[8];
typedef uint8_t ulvec8[32];
#endif
-#if defined(__aarch64__) || defined(__arm__)
-// This struct is for ARM color conversion.
+#if defined(__aarch64__) || defined(__arm__) || defined(__riscv)
+// This struct is for ARM and RISC-V color conversion.
struct YuvConstants {
uvec8 kUVCoeff;
vec16 kRGBCoeffBias;
@@ -816,13 +941,13 @@ struct YuvConstants {
#define IS_ALIGNED(p, a) (!((uintptr_t)(p) & ((a)-1)))
-#define align_buffer_64(var, size) \
- uint8_t* var##_mem = (uint8_t*)(malloc((size) + 63)); /* NOLINT */ \
- uint8_t* var = (uint8_t*)(((intptr_t)(var##_mem) + 63) & ~63) /* NOLINT */
+#define align_buffer_64(var, size) \
+ void* var##_mem = malloc((size) + 63); /* NOLINT */ \
+ uint8_t* var = (uint8_t*)(((intptr_t)var##_mem + 63) & ~63) /* NOLINT */
#define free_aligned_buffer_64(var) \
free(var##_mem); \
- var = 0
+ var = NULL
#if defined(__APPLE__) || defined(__x86_64__) || defined(__llvm__)
#define OMITFP
@@ -894,6 +1019,12 @@ void I444ToARGBRow_NEON(const uint8_t* src_y,
uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
+void I444ToRGB24Row_NEON(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_rgb24,
+ const struct YuvConstants* yuvconstants,
+ int width);
void I422ToARGBRow_NEON(const uint8_t* src_y,
const uint8_t* src_u,
const uint8_t* src_v,
@@ -981,6 +1112,50 @@ void UYVYToARGBRow_NEON(const uint8_t* src_uyvy,
uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
+void I444ToARGBRow_RVV(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void I444AlphaToARGBRow_RVV(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ const uint8_t* src_a,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void I444ToRGB24Row_RVV(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_rgb24,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void I422ToARGBRow_RVV(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void I422AlphaToARGBRow_RVV(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ const uint8_t* src_a,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void I422ToRGBARow_RVV(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_rgba,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void I422ToRGB24Row_RVV(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_rgb24,
+ const struct YuvConstants* yuvconstants,
+ int width);
void I444ToARGBRow_MSA(const uint8_t* src_y,
const uint8_t* src_u,
const uint8_t* src_v,
@@ -1000,6 +1175,12 @@ void I422ToARGBRow_MSA(const uint8_t* src_y,
uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
+void I422ToARGBRow_LSX(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width);
void I422ToARGBRow_LASX(const uint8_t* src_y,
const uint8_t* src_u,
const uint8_t* src_v,
@@ -1012,6 +1193,12 @@ void I422ToRGBARow_MSA(const uint8_t* src_y,
uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
+void I422ToRGBARow_LSX(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width);
void I422ToRGBARow_LASX(const uint8_t* src_y,
const uint8_t* src_u,
const uint8_t* src_v,
@@ -1025,6 +1212,13 @@ void I422AlphaToARGBRow_MSA(const uint8_t* src_y,
uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
+void I422AlphaToARGBRow_LSX(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ const uint8_t* src_a,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width);
void I422AlphaToARGBRow_LASX(const uint8_t* src_y,
const uint8_t* src_u,
const uint8_t* src_v,
@@ -1038,6 +1232,12 @@ void I422ToRGB24Row_MSA(const uint8_t* src_y,
uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
+void I422ToRGB24Row_LSX(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width);
void I422ToRGB24Row_LASX(const uint8_t* src_y,
const uint8_t* src_u,
const uint8_t* src_v,
@@ -1050,6 +1250,12 @@ void I422ToRGB565Row_MSA(const uint8_t* src_y,
uint8_t* dst_rgb565,
const struct YuvConstants* yuvconstants,
int width);
+void I422ToRGB565Row_LSX(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_rgb565,
+ const struct YuvConstants* yuvconstants,
+ int width);
void I422ToRGB565Row_LASX(const uint8_t* src_y,
const uint8_t* src_u,
const uint8_t* src_v,
@@ -1062,6 +1268,12 @@ void I422ToARGB4444Row_MSA(const uint8_t* src_y,
uint8_t* dst_argb4444,
const struct YuvConstants* yuvconstants,
int width);
+void I422ToARGB4444Row_LSX(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_argb4444,
+ const struct YuvConstants* yuvconstants,
+ int width);
void I422ToARGB4444Row_LASX(const uint8_t* src_y,
const uint8_t* src_u,
const uint8_t* src_v,
@@ -1074,6 +1286,12 @@ void I422ToARGB1555Row_MSA(const uint8_t* src_y,
uint8_t* dst_argb1555,
const struct YuvConstants* yuvconstants,
int width);
+void I422ToARGB1555Row_LSX(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_argb1555,
+ const struct YuvConstants* yuvconstants,
+ int width);
void I422ToARGB1555Row_LASX(const uint8_t* src_y,
const uint8_t* src_u,
const uint8_t* src_v,
@@ -1142,15 +1360,39 @@ void UYVYToARGBRow_LSX(const uint8_t* src_uyvy,
uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
+void NV12ToARGBRow_RVV(const uint8_t* src_y,
+ const uint8_t* src_uv,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void NV21ToARGBRow_RVV(const uint8_t* src_y,
+ const uint8_t* src_vu,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void NV12ToRGB24Row_RVV(const uint8_t* src_y,
+ const uint8_t* src_uv,
+ uint8_t* dst_rgb24,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void NV21ToRGB24Row_RVV(const uint8_t* src_y,
+ const uint8_t* src_vu,
+ uint8_t* dst_rgb24,
+ const struct YuvConstants* yuvconstants,
+ int width);
void ARGBToYRow_AVX2(const uint8_t* src_argb, uint8_t* dst_y, int width);
void ARGBToYRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void ABGRToYRow_AVX2(const uint8_t* src_abgr, uint8_t* dst_y, int width);
void ABGRToYRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void ARGBToYRow_SSSE3(const uint8_t* src_argb, uint8_t* dst_y, int width);
+void ARGBToYJRow_SSSE3(const uint8_t* src_argb, uint8_t* dst_y, int width);
void ARGBToYJRow_AVX2(const uint8_t* src_argb, uint8_t* dst_y, int width);
void ARGBToYJRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
-void ARGBToYJRow_SSSE3(const uint8_t* src_argb, uint8_t* dst_y, int width);
+void ABGRToYRow_SSSE3(const uint8_t* src_abgr, uint8_t* dst_y, int width);
+void ABGRToYJRow_SSSE3(const uint8_t* src_abgr, uint8_t* dst_y, int width);
+void ABGRToYJRow_AVX2(const uint8_t* src_abgr, uint8_t* dst_y, int width);
+void ABGRToYJRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void RGBAToYJRow_AVX2(const uint8_t* src_rgba, uint8_t* dst_y, int width);
void RGBAToYJRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void RGBAToYJRow_SSSE3(const uint8_t* src_rgba, uint8_t* dst_y, int width);
@@ -1164,13 +1406,23 @@ void RAWToYJRow_SSSE3(const uint8_t* src_raw, uint8_t* dst_yj, int width);
void RGB24ToYJRow_AVX2(const uint8_t* src_rgb24, uint8_t* dst_yj, int width);
void RAWToYJRow_AVX2(const uint8_t* src_raw, uint8_t* dst_yj, int width);
void ARGBToYRow_NEON(const uint8_t* src_argb, uint8_t* dst_y, int width);
-void ARGBToYJRow_NEON(const uint8_t* src_argb, uint8_t* dst_y, int width);
-void RGBAToYJRow_NEON(const uint8_t* src_rgba, uint8_t* dst_y, int width);
+void ARGBToYJRow_NEON(const uint8_t* src_argb, uint8_t* dst_yj, int width);
+void ABGRToYJRow_NEON(const uint8_t* src_abgr, uint8_t* dst_yj, int width);
+void RGBAToYJRow_NEON(const uint8_t* src_rgba, uint8_t* dst_yj, int width);
+void ARGBToYRow_RVV(const uint8_t* src_argb, uint8_t* dst_y, int width);
+void ARGBToYJRow_RVV(const uint8_t* src_argb, uint8_t* dst_yj, int width);
+void ABGRToYJRow_RVV(const uint8_t* src_rgba, uint8_t* dst_yj, int width);
+void RGBAToYJRow_RVV(const uint8_t* src_rgba, uint8_t* dst_yj, int width);
void ARGBToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width);
void ARGBToYJRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width);
+void ARGBToYRow_LSX(const uint8_t* src_argb0, uint8_t* dst_y, int width);
void ARGBToYRow_LASX(const uint8_t* src_argb0, uint8_t* dst_y, int width);
void ARGBToYJRow_LSX(const uint8_t* src_argb0, uint8_t* dst_y, int width);
+void ABGRToYJRow_LSX(const uint8_t* src_abgr, uint8_t* dst_yj, int width);
+void RGBAToYJRow_LSX(const uint8_t* src_rgba, uint8_t* dst_yj, int width);
void ARGBToYJRow_LASX(const uint8_t* src_argb0, uint8_t* dst_y, int width);
+void ABGRToYJRow_LASX(const uint8_t* src_abgr, uint8_t* dst_yj, int width);
+void RGBAToYJRow_LASX(const uint8_t* src_rgba, uint8_t* dst_yj, int width);
void ARGBToUV444Row_NEON(const uint8_t* src_argb,
uint8_t* dst_u,
uint8_t* dst_v,
@@ -1189,11 +1441,20 @@ void ARGBToUVRow_MSA(const uint8_t* src_argb,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
+void ARGBToUVRow_LSX(const uint8_t* src_argb,
+ int src_stride_argb,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
void ARGBToUVRow_LASX(const uint8_t* src_argb,
int src_stride_argb,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
+void ARGBToUV444Row_LSX(const uint8_t* src_argb,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
void ARGBToUV444Row_LASX(const uint8_t* src_argb,
uint8_t* dst_u,
uint8_t* dst_v,
@@ -1203,6 +1464,11 @@ void ARGBToUVJRow_NEON(const uint8_t* src_argb,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
+void ABGRToUVJRow_NEON(const uint8_t* src_abgr,
+ int src_stride_abgr,
+ uint8_t* dst_uj,
+ uint8_t* dst_vj,
+ int width);
void BGRAToUVRow_NEON(const uint8_t* src_bgra,
int src_stride_bgra,
uint8_t* dst_u,
@@ -1258,6 +1524,11 @@ void ARGBToUVJRow_MSA(const uint8_t* src_rgb,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
+void ABGRToUVJRow_MSA(const uint8_t* src_rgb,
+ int src_stride_rgb,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
void BGRAToUVRow_MSA(const uint8_t* src_rgb,
int src_stride_rgb,
uint8_t* dst_u,
@@ -1372,6 +1643,13 @@ void ARGB1555ToYRow_NEON(const uint8_t* src_argb1555,
void ARGB4444ToYRow_NEON(const uint8_t* src_argb4444,
uint8_t* dst_y,
int width);
+void BGRAToYRow_RVV(const uint8_t* src_bgra, uint8_t* dst_y, int width);
+void ABGRToYRow_RVV(const uint8_t* src_abgr, uint8_t* dst_y, int width);
+void RGBAToYRow_RVV(const uint8_t* src_rgba, uint8_t* dst_y, int width);
+void RGB24ToYRow_RVV(const uint8_t* src_rgb24, uint8_t* dst_y, int width);
+void RGB24ToYJRow_RVV(const uint8_t* src_rgb24, uint8_t* dst_yj, int width);
+void RAWToYRow_RVV(const uint8_t* src_raw, uint8_t* dst_y, int width);
+void RAWToYJRow_RVV(const uint8_t* src_raw, uint8_t* dst_yj, int width);
void BGRAToYRow_MSA(const uint8_t* src_argb, uint8_t* dst_y, int width);
void ABGRToYRow_MSA(const uint8_t* src_argb, uint8_t* dst_y, int width);
void RGBAToYRow_MSA(const uint8_t* src_argb, uint8_t* dst_y, int width);
@@ -1384,6 +1662,8 @@ void BGRAToYRow_LSX(const uint8_t* src_bgra, uint8_t* dst_y, int width);
void ABGRToYRow_LSX(const uint8_t* src_abgr, uint8_t* dst_y, int width);
void RGBAToYRow_LSX(const uint8_t* src_rgba, uint8_t* dst_y, int width);
void ARGB1555ToYRow_LSX(const uint8_t* src_argb1555, uint8_t* dst_y, int width);
+void RGB24ToYJRow_LSX(const uint8_t* src_rgb24, uint8_t* dst_yj, int width);
+void ABGRToYRow_LASX(const uint8_t* src_abgr, uint8_t* dst_y, int width);
void ARGB1555ToYRow_LASX(const uint8_t* src_argb1555,
uint8_t* dst_y,
int width);
@@ -1393,9 +1673,15 @@ void RGB24ToYRow_LSX(const uint8_t* src_rgb24, uint8_t* dst_y, int width);
void RGB24ToYRow_LASX(const uint8_t* src_rgb24, uint8_t* dst_y, int width);
void RAWToYRow_LSX(const uint8_t* src_raw, uint8_t* dst_y, int width);
void RAWToYRow_LASX(const uint8_t* src_raw, uint8_t* dst_y, int width);
+void RGBAToYRow_LASX(const uint8_t* src_rgba, uint8_t* dst_y, int width);
+void BGRAToYRow_LASX(const uint8_t* src_bgra, uint8_t* dst_y, int width);
+void RGB24ToYJRow_LASX(const uint8_t* src_rgb24, uint8_t* dst_yj, int width);
+void RAWToYJRow_LSX(const uint8_t* src_raw, uint8_t* dst_yj, int width);
+void RAWToYJRow_LASX(const uint8_t* src_raw, uint8_t* dst_yj, int width);
void ARGBToYRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width);
void ARGBToYJRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width);
+void ABGRToYJRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width);
void RGBAToYJRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width);
void BGRAToYRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width);
void ABGRToYRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width);
@@ -1409,6 +1695,7 @@ void ARGB1555ToYRow_C(const uint8_t* src_argb1555, uint8_t* dst_y, int width);
void ARGB4444ToYRow_C(const uint8_t* src_argb4444, uint8_t* dst_y, int width);
void ARGBToYRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void ARGBToYJRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void ABGRToYJRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void RGBAToYJRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void BGRAToYRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void ABGRToYRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
@@ -1423,6 +1710,7 @@ void RGB24ToYJRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void RAWToYJRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void ARGBToYRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void ARGBToYJRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void ABGRToYJRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void RGBAToYJRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void BGRAToYRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void ABGRToYRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
@@ -1453,10 +1741,15 @@ void ARGB1555ToYRow_Any_MSA(const uint8_t* src_ptr,
void BGRAToYRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void ABGRToYRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void RGBAToYRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void ARGBToYRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void ARGBToYJRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void RGB24ToYRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void RGB565ToYRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void ABGRToYJRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void RAWToYRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void RGBAToYJRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void RGB24ToYJRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void RAWToYJRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void ARGB1555ToYRow_Any_LSX(const uint8_t* src_ptr,
uint8_t* dst_ptr,
int width);
@@ -1465,7 +1758,14 @@ void RGB565ToYRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void RGB24ToYRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void ARGBToYJRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void ARGBToYRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void ABGRToYRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void ABGRToYJRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void RAWToYRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void RGBAToYRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void RGBAToYJRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void BGRAToYRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void RGB24ToYJRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void RAWToYJRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void ARGB1555ToYRow_Any_LASX(const uint8_t* src_ptr,
uint8_t* dst_ptr,
int width);
@@ -1485,6 +1785,11 @@ void ARGBToUVJRow_AVX2(const uint8_t* src_argb,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
+void ABGRToUVJRow_AVX2(const uint8_t* src_abgr,
+ int src_stride_abgr,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
void ARGBToUVRow_SSSE3(const uint8_t* src_argb,
int src_stride_argb,
uint8_t* dst_u,
@@ -1495,6 +1800,11 @@ void ARGBToUVJRow_SSSE3(const uint8_t* src_argb,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
+void ABGRToUVJRow_SSSE3(const uint8_t* src_abgr,
+ int src_stride_abgr,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
void BGRAToUVRow_SSSE3(const uint8_t* src_bgra,
int src_stride_bgra,
uint8_t* dst_u,
@@ -1525,6 +1835,11 @@ void ARGBToUVJRow_Any_AVX2(const uint8_t* src_ptr,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
+void ABGRToUVJRow_Any_AVX2(const uint8_t* src_ptr,
+ int src_stride,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
void ARGBToUVRow_Any_SSSE3(const uint8_t* src_ptr,
int src_stride,
uint8_t* dst_u,
@@ -1535,6 +1850,11 @@ void ARGBToUVJRow_Any_SSSE3(const uint8_t* src_ptr,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
+void ABGRToUVJRow_Any_SSSE3(const uint8_t* src_ptr,
+ int src_stride,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
void BGRAToUVRow_Any_SSSE3(const uint8_t* src_ptr,
int src_stride,
uint8_t* dst_u,
@@ -1568,11 +1888,20 @@ void ARGBToUVRow_Any_MSA(const uint8_t* src_ptr,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
+void ARGBToUVRow_Any_LSX(const uint8_t* src_ptr,
+ int src_stride_ptr,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
void ARGBToUVRow_Any_LASX(const uint8_t* src_ptr,
int src_stride_ptr,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
+void ARGBToUV444Row_Any_LSX(const uint8_t* src_ptr,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
void ARGBToUV444Row_Any_LASX(const uint8_t* src_ptr,
uint8_t* dst_u,
uint8_t* dst_v,
@@ -1582,6 +1911,11 @@ void ARGBToUVJRow_Any_NEON(const uint8_t* src_ptr,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
+void ABGRToUVJRow_Any_NEON(const uint8_t* src_ptr,
+ int src_stride,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
void BGRAToUVRow_Any_NEON(const uint8_t* src_ptr,
int src_stride,
uint8_t* dst_u,
@@ -1747,16 +2081,16 @@ void ARGBToUVJRow_C(const uint8_t* src_rgb,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
+void ABGRToUVJRow_C(const uint8_t* src_rgb,
+ int src_stride_rgb,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
void ARGBToUVRow_C(const uint8_t* src_rgb,
int src_stride_rgb,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
-void ARGBToUVJRow_C(const uint8_t* src_rgb,
- int src_stride_rgb,
- uint8_t* dst_u,
- uint8_t* dst_v,
- int width);
void BGRAToUVRow_C(const uint8_t* src_rgb,
int src_stride_rgb,
uint8_t* dst_u,
@@ -1772,6 +2106,11 @@ void RGBAToUVRow_C(const uint8_t* src_rgb,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
+void RGBAToUVJRow_C(const uint8_t* src_rgb,
+ int src_stride_rgb,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
void RGB24ToUVRow_C(const uint8_t* src_rgb,
int src_stride_rgb,
uint8_t* dst_u,
@@ -1826,6 +2165,7 @@ void MirrorRow_AVX2(const uint8_t* src, uint8_t* dst, int width);
void MirrorRow_SSSE3(const uint8_t* src, uint8_t* dst, int width);
void MirrorRow_NEON(const uint8_t* src, uint8_t* dst, int width);
void MirrorRow_MSA(const uint8_t* src, uint8_t* dst, int width);
+void MirrorRow_LSX(const uint8_t* src, uint8_t* dst, int width);
void MirrorRow_LASX(const uint8_t* src, uint8_t* dst, int width);
void MirrorRow_C(const uint8_t* src, uint8_t* dst, int width);
void MirrorRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
@@ -1833,17 +2173,20 @@ void MirrorRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void MirrorRow_Any_SSE2(const uint8_t* src, uint8_t* dst, int width);
void MirrorRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void MirrorRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void MirrorRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void MirrorRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void MirrorUVRow_AVX2(const uint8_t* src_uv, uint8_t* dst_uv, int width);
void MirrorUVRow_SSSE3(const uint8_t* src_uv, uint8_t* dst_uv, int width);
void MirrorUVRow_NEON(const uint8_t* src_uv, uint8_t* dst_uv, int width);
void MirrorUVRow_MSA(const uint8_t* src_uv, uint8_t* dst_uv, int width);
+void MirrorUVRow_LSX(const uint8_t* src_uv, uint8_t* dst_uv, int width);
void MirrorUVRow_LASX(const uint8_t* src_uv, uint8_t* dst_uv, int width);
void MirrorUVRow_C(const uint8_t* src_uv, uint8_t* dst_uv, int width);
void MirrorUVRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void MirrorUVRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void MirrorUVRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void MirrorUVRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void MirrorUVRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void MirrorUVRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void MirrorSplitUVRow_SSSE3(const uint8_t* src,
@@ -1867,10 +2210,13 @@ void MirrorSplitUVRow_C(const uint8_t* src_uv,
uint8_t* dst_v,
int width);
+void MirrorRow_16_C(const uint16_t* src, uint16_t* dst, int width);
+
void ARGBMirrorRow_AVX2(const uint8_t* src, uint8_t* dst, int width);
void ARGBMirrorRow_SSE2(const uint8_t* src, uint8_t* dst, int width);
void ARGBMirrorRow_NEON(const uint8_t* src_argb, uint8_t* dst_argb, int width);
void ARGBMirrorRow_MSA(const uint8_t* src, uint8_t* dst, int width);
+void ARGBMirrorRow_LSX(const uint8_t* src, uint8_t* dst, int width);
void ARGBMirrorRow_LASX(const uint8_t* src, uint8_t* dst, int width);
void ARGBMirrorRow_C(const uint8_t* src, uint8_t* dst, int width);
void ARGBMirrorRow_Any_AVX2(const uint8_t* src_ptr,
@@ -1883,6 +2229,7 @@ void ARGBMirrorRow_Any_NEON(const uint8_t* src_ptr,
uint8_t* dst_ptr,
int width);
void ARGBMirrorRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void ARGBMirrorRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void ARGBMirrorRow_Any_LASX(const uint8_t* src_ptr,
uint8_t* dst_ptr,
int width);
@@ -1925,6 +2272,10 @@ void SplitUVRow_LSX(const uint8_t* src_uv,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
+void SplitUVRow_RVV(const uint8_t* src_uv,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
void SplitUVRow_Any_SSE2(const uint8_t* src_ptr,
uint8_t* dst_u,
uint8_t* dst_v,
@@ -1949,7 +2300,6 @@ void DetileRow_C(const uint8_t* src,
ptrdiff_t src_tile_stride,
uint8_t* dst,
int width);
-
void DetileRow_NEON(const uint8_t* src,
ptrdiff_t src_tile_stride,
uint8_t* dst,
@@ -1966,6 +2316,42 @@ void DetileRow_Any_SSE2(const uint8_t* src,
ptrdiff_t src_tile_stride,
uint8_t* dst,
int width);
+void DetileRow_AVX(const uint8_t* src,
+ ptrdiff_t src_tile_stride,
+ uint8_t* dst,
+ int width);
+void DetileRow_Any_AVX(const uint8_t* src,
+ ptrdiff_t src_tile_stride,
+ uint8_t* dst,
+ int width);
+void DetileRow_16_C(const uint16_t* src,
+ ptrdiff_t src_tile_stride,
+ uint16_t* dst,
+ int width);
+void DetileRow_16_NEON(const uint16_t* src,
+ ptrdiff_t src_tile_stride,
+ uint16_t* dst,
+ int width);
+void DetileRow_16_Any_NEON(const uint16_t* src,
+ ptrdiff_t src_tile_stride,
+ uint16_t* dst,
+ int width);
+void DetileRow_16_SSE2(const uint16_t* src,
+ ptrdiff_t src_tile_stride,
+ uint16_t* dst,
+ int width);
+void DetileRow_16_Any_SSE2(const uint16_t* src,
+ ptrdiff_t src_tile_stride,
+ uint16_t* dst,
+ int width);
+void DetileRow_16_AVX(const uint16_t* src,
+ ptrdiff_t src_tile_stride,
+ uint16_t* dst,
+ int width);
+void DetileRow_16_Any_AVX(const uint16_t* src,
+ ptrdiff_t src_tile_stride,
+ uint16_t* dst,
+ int width);
void DetileSplitUVRow_C(const uint8_t* src_uv,
ptrdiff_t src_tile_stride,
uint8_t* dst_u,
@@ -1991,6 +2377,38 @@ void DetileSplitUVRow_Any_NEON(const uint8_t* src_uv,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
+void DetileToYUY2_C(const uint8_t* src_y,
+ ptrdiff_t src_y_tile_stride,
+ const uint8_t* src_uv,
+ ptrdiff_t src_uv_tile_stride,
+ uint8_t* dst_yuy2,
+ int width);
+void DetileToYUY2_SSE2(const uint8_t* src_y,
+ ptrdiff_t src_y_tile_stride,
+ const uint8_t* src_uv,
+ ptrdiff_t src_uv_tile_stride,
+ uint8_t* dst_yuy2,
+ int width);
+void DetileToYUY2_Any_SSE2(const uint8_t* src_y,
+ ptrdiff_t src_y_tile_stride,
+ const uint8_t* src_uv,
+ ptrdiff_t src_uv_tile_stride,
+ uint8_t* dst_yuy2,
+ int width);
+void DetileToYUY2_NEON(const uint8_t* src_y,
+ ptrdiff_t src_y_tile_stride,
+ const uint8_t* src_uv,
+ ptrdiff_t src_uv_tile_stride,
+ uint8_t* dst_yuy2,
+ int width);
+void DetileToYUY2_Any_NEON(const uint8_t* src_y,
+ ptrdiff_t src_y_tile_stride,
+ const uint8_t* src_uv,
+ ptrdiff_t src_uv_tile_stride,
+ uint8_t* dst_yuy2,
+ int width);
+void UnpackMT2T_C(const uint8_t* src, uint16_t* dst, size_t size);
+void UnpackMT2T_NEON(const uint8_t* src, uint16_t* dst, size_t size);
void MergeUVRow_C(const uint8_t* src_u,
const uint8_t* src_v,
uint8_t* dst_uv,
@@ -2003,6 +2421,10 @@ void MergeUVRow_AVX2(const uint8_t* src_u,
const uint8_t* src_v,
uint8_t* dst_uv,
int width);
+void MergeUVRow_AVX512BW(const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_uv,
+ int width);
void MergeUVRow_NEON(const uint8_t* src_u,
const uint8_t* src_v,
uint8_t* dst_uv,
@@ -2015,6 +2437,10 @@ void MergeUVRow_LSX(const uint8_t* src_u,
const uint8_t* src_v,
uint8_t* dst_uv,
int width);
+void MergeUVRow_RVV(const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_uv,
+ int width);
void MergeUVRow_Any_SSE2(const uint8_t* y_buf,
const uint8_t* uv_buf,
uint8_t* dst_ptr,
@@ -2023,6 +2449,10 @@ void MergeUVRow_Any_AVX2(const uint8_t* y_buf,
const uint8_t* uv_buf,
uint8_t* dst_ptr,
int width);
+void MergeUVRow_Any_AVX512BW(const uint8_t* y_buf,
+ const uint8_t* uv_buf,
+ uint8_t* dst_ptr,
+ int width);
void MergeUVRow_Any_NEON(const uint8_t* y_buf,
const uint8_t* uv_buf,
uint8_t* dst_ptr,
@@ -2079,6 +2509,11 @@ void SplitRGBRow_NEON(const uint8_t* src_rgb,
uint8_t* dst_g,
uint8_t* dst_b,
int width);
+void SplitRGBRow_RVV(const uint8_t* src_rgb,
+ uint8_t* dst_r,
+ uint8_t* dst_g,
+ uint8_t* dst_b,
+ int width);
void SplitRGBRow_Any_SSSE3(const uint8_t* src_ptr,
uint8_t* dst_r,
uint8_t* dst_g,
@@ -2105,6 +2540,11 @@ void MergeRGBRow_NEON(const uint8_t* src_r,
const uint8_t* src_b,
uint8_t* dst_rgb,
int width);
+void MergeRGBRow_RVV(const uint8_t* src_r,
+ const uint8_t* src_g,
+ const uint8_t* src_b,
+ uint8_t* dst_rgb,
+ int width);
void MergeRGBRow_Any_SSSE3(const uint8_t* y_buf,
const uint8_t* u_buf,
const uint8_t* v_buf,
@@ -2139,6 +2579,12 @@ void MergeARGBRow_NEON(const uint8_t* src_r,
const uint8_t* src_a,
uint8_t* dst_argb,
int width);
+void MergeARGBRow_RVV(const uint8_t* src_r,
+ const uint8_t* src_g,
+ const uint8_t* src_b,
+ const uint8_t* src_a,
+ uint8_t* dst_argb,
+ int width);
void MergeARGBRow_Any_SSE2(const uint8_t* y_buf,
const uint8_t* u_buf,
const uint8_t* v_buf,
@@ -2187,6 +2633,12 @@ void SplitARGBRow_NEON(const uint8_t* src_rgba,
uint8_t* dst_b,
uint8_t* dst_a,
int width);
+void SplitARGBRow_RVV(const uint8_t* src_rgba,
+ uint8_t* dst_r,
+ uint8_t* dst_g,
+ uint8_t* dst_b,
+ uint8_t* dst_a,
+ int width);
void SplitARGBRow_Any_SSE2(const uint8_t* src_ptr,
uint8_t* dst_r,
uint8_t* dst_g,
@@ -2231,6 +2683,11 @@ void MergeXRGBRow_NEON(const uint8_t* src_r,
const uint8_t* src_b,
uint8_t* dst_argb,
int width);
+void MergeXRGBRow_RVV(const uint8_t* src_r,
+ const uint8_t* src_g,
+ const uint8_t* src_b,
+ uint8_t* dst_argb,
+ int width);
void MergeXRGBRow_Any_SSE2(const uint8_t* y_buf,
const uint8_t* u_buf,
const uint8_t* v_buf,
@@ -2271,6 +2728,11 @@ void SplitXRGBRow_NEON(const uint8_t* src_rgba,
uint8_t* dst_g,
uint8_t* dst_b,
int width);
+void SplitXRGBRow_RVV(const uint8_t* src_rgba,
+ uint8_t* dst_r,
+ uint8_t* dst_g,
+ uint8_t* dst_b,
+ int width);
void SplitXRGBRow_Any_SSE2(const uint8_t* src_ptr,
uint8_t* dst_r,
uint8_t* dst_g,
@@ -2604,8 +3066,8 @@ void Convert16To8Row_NEON(const uint16_t* src_y,
uint8_t* dst_y,
int scale,
int width);
-void Convert16To8Row_Any_NEON(const uint16_t* src_y,
- uint8_t* dst_y,
+void Convert16To8Row_Any_NEON(const uint16_t* src_ptr,
+ uint8_t* dst_ptr,
int scale,
int width);
@@ -2614,6 +3076,7 @@ void CopyRow_AVX(const uint8_t* src, uint8_t* dst, int width);
void CopyRow_ERMS(const uint8_t* src, uint8_t* dst, int width);
void CopyRow_NEON(const uint8_t* src, uint8_t* dst, int width);
void CopyRow_MIPS(const uint8_t* src, uint8_t* dst, int count);
+void CopyRow_RVV(const uint8_t* src, uint8_t* dst, int count);
void CopyRow_C(const uint8_t* src, uint8_t* dst, int count);
void CopyRow_Any_SSE2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void CopyRow_Any_AVX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
@@ -2647,6 +3110,9 @@ void ARGBExtractAlphaRow_MSA(const uint8_t* src_argb,
void ARGBExtractAlphaRow_LSX(const uint8_t* src_argb,
uint8_t* dst_a,
int width);
+void ARGBExtractAlphaRow_RVV(const uint8_t* src_argb,
+ uint8_t* dst_a,
+ int width);
void ARGBExtractAlphaRow_Any_SSE2(const uint8_t* src_ptr,
uint8_t* dst_ptr,
int width);
@@ -2666,6 +3132,7 @@ void ARGBExtractAlphaRow_Any_LSX(const uint8_t* src_ptr,
void ARGBCopyYToAlphaRow_C(const uint8_t* src, uint8_t* dst, int width);
void ARGBCopyYToAlphaRow_SSE2(const uint8_t* src, uint8_t* dst, int width);
void ARGBCopyYToAlphaRow_AVX2(const uint8_t* src, uint8_t* dst, int width);
+void ARGBCopyYToAlphaRow_RVV(const uint8_t* src, uint8_t* dst, int width);
void ARGBCopyYToAlphaRow_Any_SSE2(const uint8_t* src_ptr,
uint8_t* dst_ptr,
int width);
@@ -2713,6 +3180,10 @@ void ARGBShuffleRow_MSA(const uint8_t* src_argb,
uint8_t* dst_argb,
const uint8_t* shuffler,
int width);
+void ARGBShuffleRow_LSX(const uint8_t* src_argb,
+ uint8_t* dst_argb,
+ const uint8_t* shuffler,
+ int width);
void ARGBShuffleRow_LASX(const uint8_t* src_argb,
uint8_t* dst_argb,
const uint8_t* shuffler,
@@ -2733,6 +3204,10 @@ void ARGBShuffleRow_Any_MSA(const uint8_t* src_ptr,
uint8_t* dst_ptr,
const uint8_t* param,
int width);
+void ARGBShuffleRow_Any_LSX(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ const uint8_t* param,
+ int width);
void ARGBShuffleRow_Any_LASX(const uint8_t* src_ptr,
uint8_t* dst_ptr,
const uint8_t* param,
@@ -2765,14 +3240,18 @@ void RGB24ToARGBRow_LSX(const uint8_t* src_rgb24, uint8_t* dst_argb, int width);
void RGB24ToARGBRow_LASX(const uint8_t* src_rgb24,
uint8_t* dst_argb,
int width);
+void RGB24ToARGBRow_RVV(const uint8_t* src_rgb24, uint8_t* dst_argb, int width);
void RAWToARGBRow_NEON(const uint8_t* src_raw, uint8_t* dst_argb, int width);
void RAWToRGBARow_NEON(const uint8_t* src_raw, uint8_t* dst_rgba, int width);
void RAWToARGBRow_MSA(const uint8_t* src_raw, uint8_t* dst_argb, int width);
void RAWToARGBRow_LSX(const uint8_t* src_raw, uint8_t* dst_argb, int width);
void RAWToARGBRow_LASX(const uint8_t* src_raw, uint8_t* dst_argb, int width);
+void RAWToARGBRow_RVV(const uint8_t* src_raw, uint8_t* dst_argb, int width);
+void RAWToRGBARow_RVV(const uint8_t* src_raw, uint8_t* dst_rgba, int width);
void RAWToRGB24Row_NEON(const uint8_t* src_raw, uint8_t* dst_rgb24, int width);
void RAWToRGB24Row_MSA(const uint8_t* src_raw, uint8_t* dst_rgb24, int width);
void RAWToRGB24Row_LSX(const uint8_t* src_raw, uint8_t* dst_rgb24, int width);
+void RAWToRGB24Row_RVV(const uint8_t* src_raw, uint8_t* dst_rgb24, int width);
void RGB565ToARGBRow_NEON(const uint8_t* src_rgb565,
uint8_t* dst_argb,
int width);
@@ -2932,15 +3411,15 @@ void ARGBToRGB24Row_AVX512VBMI(const uint8_t* src, uint8_t* dst, int width);
void ARGBToRGB565DitherRow_C(const uint8_t* src_argb,
uint8_t* dst_rgb,
- const uint32_t dither4,
+ uint32_t dither4,
int width);
void ARGBToRGB565DitherRow_SSE2(const uint8_t* src,
uint8_t* dst,
- const uint32_t dither4,
+ uint32_t dither4,
int width);
void ARGBToRGB565DitherRow_AVX2(const uint8_t* src,
uint8_t* dst,
- const uint32_t dither4,
+ uint32_t dither4,
int width);
void ARGBToRGB565Row_AVX2(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
@@ -2968,7 +3447,7 @@ void ARGBToARGB4444Row_NEON(const uint8_t* src_argb,
int width);
void ARGBToRGB565DitherRow_NEON(const uint8_t* src_argb,
uint8_t* dst_rgb,
- const uint32_t dither4,
+ uint32_t dither4,
int width);
void ARGBToRGB24Row_MSA(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
void ARGBToRAWRow_MSA(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
@@ -2981,23 +3460,44 @@ void ARGBToARGB4444Row_MSA(const uint8_t* src_argb,
int width);
void ARGBToRGB565DitherRow_MSA(const uint8_t* src_argb,
uint8_t* dst_rgb,
- const uint32_t dither4,
+ uint32_t dither4,
+ int width);
+void ARGBToRGB565DitherRow_LSX(const uint8_t* src_argb,
+ uint8_t* dst_rgb,
+ uint32_t dither4,
int width);
void ARGBToRGB565DitherRow_LASX(const uint8_t* src_argb,
uint8_t* dst_rgb,
- const uint32_t dither4,
+ uint32_t dither4,
int width);
+void ARGBToRGB24Row_LSX(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
void ARGBToRGB24Row_LASX(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
+void ARGBToRAWRow_LSX(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
void ARGBToRAWRow_LASX(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
+void ARGBToRGB565Row_LSX(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
void ARGBToRGB565Row_LASX(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
+void ARGBToARGB1555Row_LSX(const uint8_t* src_argb,
+ uint8_t* dst_rgb,
+ int width);
void ARGBToARGB1555Row_LASX(const uint8_t* src_argb,
uint8_t* dst_rgb,
int width);
+void ARGBToARGB4444Row_LSX(const uint8_t* src_argb,
+ uint8_t* dst_rgb,
+ int width);
void ARGBToARGB4444Row_LASX(const uint8_t* src_argb,
uint8_t* dst_rgb,
int width);
+void ARGBToRAWRow_RVV(const uint8_t* src_argb, uint8_t* dst_raw, int width);
+void ARGBToABGRRow_RVV(const uint8_t* src_argb, uint8_t* dst_abgr, int width);
+void ARGBToBGRARow_RVV(const uint8_t* src_argb, uint8_t* dst_rgba, int width);
+void ARGBToRGBARow_RVV(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
+void ARGBToRGB24Row_RVV(const uint8_t* src_argb, uint8_t* dst_rgb24, int width);
+
+void ARGBToABGRRow_C(const uint8_t* src_argb, uint8_t* dst_abgr, int width);
+void ARGBToBGRARow_C(const uint8_t* src_argb, uint8_t* dst_bgra, int width);
void ARGBToRGBARow_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
void ARGBToRGB24Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
void ARGBToRAWRow_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
@@ -3011,6 +3511,8 @@ void ARGBToAR64Row_C(const uint8_t* src_argb, uint16_t* dst_ar64, int width);
void ARGBToAB64Row_C(const uint8_t* src_argb, uint16_t* dst_ab64, int width);
void AR64ToARGBRow_C(const uint16_t* src_ar64, uint8_t* dst_argb, int width);
void AB64ToARGBRow_C(const uint16_t* src_ab64, uint8_t* dst_argb, int width);
+void AR64ToAB64Row_C(const uint16_t* src_ar64, uint16_t* dst_ab64, int width);
+void RGBAToARGBRow_C(const uint8_t* src_rgba, uint8_t* dst_argb, int width);
void AR64ShuffleRow_C(const uint8_t* src_ar64,
uint8_t* dst_ar64,
const uint8_t* shuffler,
@@ -3035,6 +3537,12 @@ void ARGBToAR64Row_NEON(const uint8_t* src_argb, uint16_t* dst_ar64, int width);
void ARGBToAB64Row_NEON(const uint8_t* src_argb, uint16_t* dst_ab64, int width);
void AR64ToARGBRow_NEON(const uint16_t* src_ar64, uint8_t* dst_argb, int width);
void AB64ToARGBRow_NEON(const uint16_t* src_ab64, uint8_t* dst_argb, int width);
+void ARGBToAR64Row_RVV(const uint8_t* src_argb, uint16_t* dst_ar64, int width);
+void ARGBToAB64Row_RVV(const uint8_t* src_argb, uint16_t* dst_ab64, int width);
+void AR64ToARGBRow_RVV(const uint16_t* src_ar64, uint8_t* dst_argb, int width);
+void AB64ToARGBRow_RVV(const uint16_t* src_ab64, uint8_t* dst_argb, int width);
+void AR64ToAB64Row_RVV(const uint16_t* src_ar64, uint16_t* dst_ab64, int width);
+void RGBAToARGBRow_RVV(const uint8_t* src_rgba, uint8_t* dst_argb, int width);
void ARGBToAR64Row_Any_SSSE3(const uint8_t* src_ptr,
uint16_t* dst_ptr,
int width);
@@ -3077,6 +3585,7 @@ void J400ToARGBRow_AVX2(const uint8_t* src_y, uint8_t* dst_argb, int width);
void J400ToARGBRow_NEON(const uint8_t* src_y, uint8_t* dst_argb, int width);
void J400ToARGBRow_MSA(const uint8_t* src_y, uint8_t* dst_argb, int width);
void J400ToARGBRow_LSX(const uint8_t* src_y, uint8_t* dst_argb, int width);
+void J400ToARGBRow_RVV(const uint8_t* src_y, uint8_t* dst_argb, int width);
void J400ToARGBRow_C(const uint8_t* src_y, uint8_t* dst_argb, int width);
void J400ToARGBRow_Any_SSE2(const uint8_t* src_ptr,
uint8_t* dst_ptr,
@@ -3096,6 +3605,12 @@ void I444ToARGBRow_C(const uint8_t* src_y,
uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants,
int width);
+void I444ToRGB24Row_C(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* rgb_buf,
+ const struct YuvConstants* yuvconstants,
+ int width);
void I422ToARGBRow_C(const uint8_t* src_y,
const uint8_t* src_u,
const uint8_t* src_v,
@@ -3290,6 +3805,18 @@ void I444ToARGBRow_AVX2(const uint8_t* y_buf,
uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
+void I444ToRGB24Row_SSSE3(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_rgb24,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void I444ToRGB24Row_AVX2(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_rgb24,
+ const struct YuvConstants* yuvconstants,
+ int width);
void I422ToARGBRow_SSSE3(const uint8_t* y_buf,
const uint8_t* u_buf,
const uint8_t* v_buf,
@@ -3631,12 +4158,24 @@ void I444ToARGBRow_Any_SSSE3(const uint8_t* y_buf,
uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants,
int width);
+void I444ToRGB24Row_Any_SSSE3(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_ptr,
+ const struct YuvConstants* yuvconstants,
+ int width);
void I444ToARGBRow_Any_AVX2(const uint8_t* y_buf,
const uint8_t* u_buf,
const uint8_t* v_buf,
uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants,
int width);
+void I444ToRGB24Row_Any_AVX2(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_ptr,
+ const struct YuvConstants* yuvconstants,
+ int width);
void I422ToARGBRow_Any_SSSE3(const uint8_t* y_buf,
const uint8_t* u_buf,
const uint8_t* v_buf,
@@ -3823,13 +4362,13 @@ void NV21ToRGB24Row_Any_AVX2(const uint8_t* y_buf,
uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants,
int width);
-void NV21ToYUV24Row_Any_SSSE3(const uint8_t* src_y,
- const uint8_t* src_vu,
- uint8_t* dst_yuv24,
+void NV21ToYUV24Row_Any_SSSE3(const uint8_t* y_buf,
+ const uint8_t* uv_buf,
+ uint8_t* dst_ptr,
int width);
-void NV21ToYUV24Row_Any_AVX2(const uint8_t* src_y,
- const uint8_t* src_vu,
- uint8_t* dst_yuv24,
+void NV21ToYUV24Row_Any_AVX2(const uint8_t* y_buf,
+ const uint8_t* uv_buf,
+ uint8_t* dst_ptr,
int width);
void NV12ToRGB565Row_Any_SSSE3(const uint8_t* y_buf,
const uint8_t* uv_buf,
@@ -3976,6 +4515,10 @@ void I400ToARGBRow_LSX(const uint8_t* src_y,
uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
+void I400ToARGBRow_RVV(const uint8_t* src_y,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width);
void I400ToARGBRow_Any_SSE2(const uint8_t* src_ptr,
uint8_t* dst_ptr,
const struct YuvConstants* param,
@@ -4014,6 +4557,10 @@ void ARGBBlendRow_LSX(const uint8_t* src_argb0,
const uint8_t* src_argb1,
uint8_t* dst_argb,
int width);
+void ARGBBlendRow_RVV(const uint8_t* src_argb0,
+ const uint8_t* src_argb1,
+ uint8_t* dst_argb,
+ int width);
void ARGBBlendRow_C(const uint8_t* src_argb,
const uint8_t* src_argb1,
uint8_t* dst_argb,
@@ -4040,6 +4587,11 @@ void BlendPlaneRow_Any_AVX2(const uint8_t* y_buf,
const uint8_t* v_buf,
uint8_t* dst_ptr,
int width);
+void BlendPlaneRow_RVV(const uint8_t* src0,
+ const uint8_t* src1,
+ const uint8_t* alpha,
+ uint8_t* dst,
+ int width);
void BlendPlaneRow_C(const uint8_t* src0,
const uint8_t* src1,
const uint8_t* alpha,
@@ -4084,10 +4636,18 @@ void ARGBMultiplyRow_Any_MSA(const uint8_t* y_buf,
const uint8_t* uv_buf,
uint8_t* dst_ptr,
int width);
+void ARGBMultiplyRow_LSX(const uint8_t* src_argb0,
+ const uint8_t* src_argb1,
+ uint8_t* dst_argb,
+ int width);
void ARGBMultiplyRow_LASX(const uint8_t* src_argb0,
const uint8_t* src_argb1,
uint8_t* dst_argb,
int width);
+void ARGBMultiplyRow_Any_LSX(const uint8_t* y_buf,
+ const uint8_t* uv_buf,
+ uint8_t* dst_ptr,
+ int width);
void ARGBMultiplyRow_Any_LASX(const uint8_t* y_buf,
const uint8_t* uv_buf,
uint8_t* dst_ptr,
@@ -4130,10 +4690,18 @@ void ARGBAddRow_Any_MSA(const uint8_t* y_buf,
const uint8_t* uv_buf,
uint8_t* dst_ptr,
int width);
+void ARGBAddRow_LSX(const uint8_t* src_argb0,
+ const uint8_t* src_argb1,
+ uint8_t* dst_argb,
+ int width);
void ARGBAddRow_LASX(const uint8_t* src_argb0,
const uint8_t* src_argb1,
uint8_t* dst_argb,
int width);
+void ARGBAddRow_Any_LSX(const uint8_t* y_buf,
+ const uint8_t* uv_buf,
+ uint8_t* dst_ptr,
+ int width);
void ARGBAddRow_Any_LASX(const uint8_t* y_buf,
const uint8_t* uv_buf,
uint8_t* dst_ptr,
@@ -4177,10 +4745,18 @@ void ARGBSubtractRow_Any_MSA(const uint8_t* y_buf,
const uint8_t* uv_buf,
uint8_t* dst_ptr,
int width);
+void ARGBSubtractRow_LSX(const uint8_t* src_argb0,
+ const uint8_t* src_argb1,
+ uint8_t* dst_argb,
+ int width);
void ARGBSubtractRow_LASX(const uint8_t* src_argb0,
const uint8_t* src_argb1,
uint8_t* dst_argb,
int width);
+void ARGBSubtractRow_Any_LSX(const uint8_t* y_buf,
+ const uint8_t* uv_buf,
+ uint8_t* dst_ptr,
+ int width);
void ARGBSubtractRow_Any_LASX(const uint8_t* y_buf,
const uint8_t* uv_buf,
uint8_t* dst_ptr,
@@ -4273,21 +4849,37 @@ void ARGBToRGB565DitherRow_Any_MSA(const uint8_t* src_ptr,
uint8_t* dst_ptr,
const uint32_t param,
int width);
+void ARGBToRGB565DitherRow_Any_LSX(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ const uint32_t param,
+ int width);
void ARGBToRGB565DitherRow_Any_LASX(const uint8_t* src_ptr,
uint8_t* dst_ptr,
const uint32_t param,
int width);
-
+void ARGBToRGB24Row_Any_LSX(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ int width);
void ARGBToRGB24Row_Any_LASX(const uint8_t* src_ptr,
uint8_t* dst_ptr,
int width);
+void ARGBToRAWRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void ARGBToRAWRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void ARGBToRGB565Row_Any_LSX(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ int width);
void ARGBToRGB565Row_Any_LASX(const uint8_t* src_ptr,
uint8_t* dst_ptr,
int width);
+void ARGBToARGB1555Row_Any_LSX(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ int width);
void ARGBToARGB1555Row_Any_LASX(const uint8_t* src_ptr,
uint8_t* dst_ptr,
int width);
+void ARGBToARGB4444Row_Any_LSX(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ int width);
void ARGBToARGB4444Row_Any_LASX(const uint8_t* src_ptr,
uint8_t* dst_ptr,
int width);
@@ -4298,6 +4890,12 @@ void I444ToARGBRow_Any_NEON(const uint8_t* y_buf,
uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants,
int width);
+void I444ToRGB24Row_Any_NEON(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_ptr,
+ const struct YuvConstants* yuvconstants,
+ int width);
void I422ToARGBRow_Any_NEON(const uint8_t* y_buf,
const uint8_t* u_buf,
const uint8_t* v_buf,
@@ -4443,6 +5041,12 @@ void I422ToARGBRow_Any_MSA(const uint8_t* y_buf,
uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants,
int width);
+void I422ToARGBRow_Any_LSX(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_ptr,
+ const struct YuvConstants* yuvconstants,
+ int width);
void I422ToARGBRow_Any_LASX(const uint8_t* y_buf,
const uint8_t* u_buf,
const uint8_t* v_buf,
@@ -4455,6 +5059,12 @@ void I422ToRGBARow_Any_MSA(const uint8_t* y_buf,
uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants,
int width);
+void I422ToRGBARow_Any_LSX(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_ptr,
+ const struct YuvConstants* yuvconstants,
+ int width);
void I422ToRGBARow_Any_LASX(const uint8_t* y_buf,
const uint8_t* u_buf,
const uint8_t* v_buf,
@@ -4468,6 +5078,13 @@ void I422AlphaToARGBRow_Any_MSA(const uint8_t* y_buf,
uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants,
int width);
+void I422AlphaToARGBRow_Any_LSX(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ const uint8_t* a_buf,
+ uint8_t* dst_ptr,
+ const struct YuvConstants* yuvconstants,
+ int width);
void I422AlphaToARGBRow_Any_LASX(const uint8_t* y_buf,
const uint8_t* u_buf,
const uint8_t* v_buf,
@@ -4481,6 +5098,12 @@ void I422ToRGB24Row_Any_MSA(const uint8_t* y_buf,
uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants,
int width);
+void I422ToRGB24Row_Any_LSX(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_ptr,
+ const struct YuvConstants* yuvconstants,
+ int width);
void I422ToRGB24Row_Any_LASX(const uint8_t* y_buf,
const uint8_t* u_buf,
const uint8_t* v_buf,
@@ -4493,6 +5116,12 @@ void I422ToRGB565Row_Any_MSA(const uint8_t* y_buf,
uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants,
int width);
+void I422ToRGB565Row_Any_LSX(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_ptr,
+ const struct YuvConstants* yuvconstants,
+ int width);
void I422ToRGB565Row_Any_LASX(const uint8_t* y_buf,
const uint8_t* u_buf,
const uint8_t* v_buf,
@@ -4505,6 +5134,12 @@ void I422ToARGB4444Row_Any_MSA(const uint8_t* y_buf,
uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants,
int width);
+void I422ToARGB4444Row_Any_LSX(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_ptr,
+ const struct YuvConstants* yuvconstants,
+ int width);
void I422ToARGB4444Row_Any_LASX(const uint8_t* y_buf,
const uint8_t* u_buf,
const uint8_t* v_buf,
@@ -4517,6 +5152,12 @@ void I422ToARGB1555Row_Any_MSA(const uint8_t* y_buf,
uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants,
int width);
+void I422ToARGB1555Row_Any_LSX(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_ptr,
+ const struct YuvConstants* yuvconstants,
+ int width);
void I422ToARGB1555Row_Any_LASX(const uint8_t* y_buf,
const uint8_t* u_buf,
const uint8_t* v_buf,
@@ -4592,6 +5233,10 @@ void YUY2ToUVRow_AVX2(const uint8_t* src_yuy2,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
+void YUY2ToNVUVRow_AVX2(const uint8_t* src_yuy2,
+ int stride_yuy2,
+ uint8_t* dst_uv,
+ int width);
void YUY2ToUV422Row_AVX2(const uint8_t* src_yuy2,
uint8_t* dst_u,
uint8_t* dst_v,
@@ -4602,6 +5247,10 @@ void YUY2ToUVRow_SSE2(const uint8_t* src_yuy2,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
+void YUY2ToNVUVRow_SSE2(const uint8_t* src_yuy2,
+ int stride_yuy2,
+ uint8_t* dst_uv,
+ int width);
void YUY2ToUV422Row_SSE2(const uint8_t* src_yuy2,
uint8_t* dst_u,
uint8_t* dst_v,
@@ -4612,17 +5261,27 @@ void YUY2ToUVRow_NEON(const uint8_t* src_yuy2,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
+void YUY2ToNVUVRow_NEON(const uint8_t* src_yuy2,
+ int stride_yuy2,
+ uint8_t* dst_uv,
+ int width);
void YUY2ToUV422Row_NEON(const uint8_t* src_yuy2,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
void YUY2ToYRow_MSA(const uint8_t* src_yuy2, uint8_t* dst_y, int width);
+void YUY2ToYRow_LSX(const uint8_t* src_yuy2, uint8_t* dst_y, int width);
void YUY2ToYRow_LASX(const uint8_t* src_yuy2, uint8_t* dst_y, int width);
void YUY2ToUVRow_MSA(const uint8_t* src_yuy2,
int src_stride_yuy2,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
+void YUY2ToUVRow_LSX(const uint8_t* src_yuy2,
+ int src_stride_yuy2,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
void YUY2ToUVRow_LASX(const uint8_t* src_yuy2,
int src_stride_yuy2,
uint8_t* dst_u,
@@ -4632,6 +5291,10 @@ void YUY2ToUV422Row_MSA(const uint8_t* src_yuy2,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
+void YUY2ToUV422Row_LSX(const uint8_t* src_yuy2,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
void YUY2ToUV422Row_LASX(const uint8_t* src_yuy2,
uint8_t* dst_u,
uint8_t* dst_v,
@@ -4642,6 +5305,10 @@ void YUY2ToUVRow_C(const uint8_t* src_yuy2,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
+void YUY2ToNVUVRow_C(const uint8_t* src_yuy2,
+ int src_stride_yuy2,
+ uint8_t* dst_uv,
+ int width);
void YUY2ToUV422Row_C(const uint8_t* src_yuy2,
uint8_t* dst_u,
uint8_t* dst_v,
@@ -4652,6 +5319,10 @@ void YUY2ToUVRow_Any_AVX2(const uint8_t* src_ptr,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
+void YUY2ToNVUVRow_Any_AVX2(const uint8_t* src_yuy2,
+ int stride_yuy2,
+ uint8_t* dst_uv,
+ int width);
void YUY2ToUV422Row_Any_AVX2(const uint8_t* src_ptr,
uint8_t* dst_u,
uint8_t* dst_v,
@@ -4662,6 +5333,10 @@ void YUY2ToUVRow_Any_SSE2(const uint8_t* src_ptr,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
+void YUY2ToNVUVRow_Any_SSE2(const uint8_t* src_yuy2,
+ int stride_yuy2,
+ uint8_t* dst_uv,
+ int width);
void YUY2ToUV422Row_Any_SSE2(const uint8_t* src_ptr,
uint8_t* dst_u,
uint8_t* dst_v,
@@ -4672,17 +5347,27 @@ void YUY2ToUVRow_Any_NEON(const uint8_t* src_ptr,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
+void YUY2ToNVUVRow_Any_NEON(const uint8_t* src_yuy2,
+ int stride_yuy2,
+ uint8_t* dst_uv,
+ int width);
void YUY2ToUV422Row_Any_NEON(const uint8_t* src_ptr,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
void YUY2ToYRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void YUY2ToYRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void YUY2ToYRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void YUY2ToUVRow_Any_MSA(const uint8_t* src_ptr,
int src_stride_ptr,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
+void YUY2ToUVRow_Any_LSX(const uint8_t* src_ptr,
+ int src_stride_ptr,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
void YUY2ToUVRow_Any_LASX(const uint8_t* src_ptr,
int src_stride_ptr,
uint8_t* dst_u,
@@ -4692,6 +5377,10 @@ void YUY2ToUV422Row_Any_MSA(const uint8_t* src_ptr,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
+void YUY2ToUV422Row_Any_LSX(const uint8_t* src_ptr,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
void YUY2ToUV422Row_Any_LASX(const uint8_t* src_ptr,
uint8_t* dst_u,
uint8_t* dst_v,
@@ -4737,12 +5426,18 @@ void UYVYToUV422Row_NEON(const uint8_t* src_uyvy,
uint8_t* dst_v,
int width);
void UYVYToYRow_MSA(const uint8_t* src_uyvy, uint8_t* dst_y, int width);
+void UYVYToYRow_LSX(const uint8_t* src_uyvy, uint8_t* dst_y, int width);
void UYVYToYRow_LASX(const uint8_t* src_uyvy, uint8_t* dst_y, int width);
void UYVYToUVRow_MSA(const uint8_t* src_uyvy,
int src_stride_uyvy,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
+void UYVYToUVRow_LSX(const uint8_t* src_uyvy,
+ int src_stride_uyvy,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
void UYVYToUVRow_LASX(const uint8_t* src_uyvy,
int src_stride_uyvy,
uint8_t* dst_u,
@@ -4752,6 +5447,10 @@ void UYVYToUV422Row_MSA(const uint8_t* src_uyvy,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
+void UYVYToUV422Row_LSX(const uint8_t* src_uyvy,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
void UYVYToUV422Row_LASX(const uint8_t* src_uyvy,
uint8_t* dst_u,
uint8_t* dst_v,
@@ -4798,12 +5497,18 @@ void UYVYToUV422Row_Any_NEON(const uint8_t* src_ptr,
uint8_t* dst_v,
int width);
void UYVYToYRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void UYVYToYRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void UYVYToYRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void UYVYToUVRow_Any_MSA(const uint8_t* src_ptr,
int src_stride_ptr,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
+void UYVYToUVRow_Any_LSX(const uint8_t* src_ptr,
+ int src_stride_ptr,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
void UYVYToUVRow_Any_LASX(const uint8_t* src_ptr,
int src_stride_ptr,
uint8_t* dst_u,
@@ -4813,6 +5518,10 @@ void UYVYToUV422Row_Any_MSA(const uint8_t* src_ptr,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
+void UYVYToUV422Row_Any_LSX(const uint8_t* src_ptr,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
void UYVYToUV422Row_Any_LASX(const uint8_t* src_ptr,
uint8_t* dst_u,
uint8_t* dst_v,
@@ -4927,6 +5636,11 @@ void I422ToYUY2Row_MSA(const uint8_t* src_y,
const uint8_t* src_v,
uint8_t* dst_yuy2,
int width);
+void I422ToYUY2Row_LSX(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_yuy2,
+ int width);
void I422ToYUY2Row_LASX(const uint8_t* src_y,
const uint8_t* src_u,
const uint8_t* src_v,
@@ -4937,6 +5651,11 @@ void I422ToUYVYRow_MSA(const uint8_t* src_y,
const uint8_t* src_v,
uint8_t* dst_uyvy,
int width);
+void I422ToUYVYRow_LSX(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_uyvy,
+ int width);
void I422ToUYVYRow_LASX(const uint8_t* src_y,
const uint8_t* src_u,
const uint8_t* src_v,
@@ -4947,6 +5666,11 @@ void I422ToYUY2Row_Any_MSA(const uint8_t* y_buf,
const uint8_t* v_buf,
uint8_t* dst_ptr,
int width);
+void I422ToYUY2Row_Any_LSX(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_ptr,
+ int width);
void I422ToYUY2Row_Any_LASX(const uint8_t* y_buf,
const uint8_t* u_buf,
const uint8_t* v_buf,
@@ -4957,6 +5681,11 @@ void I422ToUYVYRow_Any_MSA(const uint8_t* y_buf,
const uint8_t* v_buf,
uint8_t* dst_ptr,
int width);
+void I422ToUYVYRow_Any_LSX(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_ptr,
+ int width);
void I422ToUYVYRow_Any_LASX(const uint8_t* y_buf,
const uint8_t* u_buf,
const uint8_t* v_buf,
@@ -4977,9 +5706,15 @@ void ARGBAttenuateRow_NEON(const uint8_t* src_argb,
void ARGBAttenuateRow_MSA(const uint8_t* src_argb,
uint8_t* dst_argb,
int width);
+void ARGBAttenuateRow_LSX(const uint8_t* src_argb,
+ uint8_t* dst_argb,
+ int width);
void ARGBAttenuateRow_LASX(const uint8_t* src_argb,
uint8_t* dst_argb,
int width);
+void ARGBAttenuateRow_RVV(const uint8_t* src_argb,
+ uint8_t* dst_argb,
+ int width);
void ARGBAttenuateRow_Any_SSSE3(const uint8_t* src_ptr,
uint8_t* dst_ptr,
int width);
@@ -4992,6 +5727,9 @@ void ARGBAttenuateRow_Any_NEON(const uint8_t* src_ptr,
void ARGBAttenuateRow_Any_MSA(const uint8_t* src_ptr,
uint8_t* dst_ptr,
int width);
+void ARGBAttenuateRow_Any_LSX(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ int width);
void ARGBAttenuateRow_Any_LASX(const uint8_t* src_ptr,
uint8_t* dst_ptr,
int width);
@@ -5018,12 +5756,14 @@ void ARGBGrayRow_C(const uint8_t* src_argb, uint8_t* dst_argb, int width);
void ARGBGrayRow_SSSE3(const uint8_t* src_argb, uint8_t* dst_argb, int width);
void ARGBGrayRow_NEON(const uint8_t* src_argb, uint8_t* dst_argb, int width);
void ARGBGrayRow_MSA(const uint8_t* src_argb, uint8_t* dst_argb, int width);
+void ARGBGrayRow_LSX(const uint8_t* src_argb, uint8_t* dst_argb, int width);
void ARGBGrayRow_LASX(const uint8_t* src_argb, uint8_t* dst_argb, int width);
void ARGBSepiaRow_C(uint8_t* dst_argb, int width);
void ARGBSepiaRow_SSSE3(uint8_t* dst_argb, int width);
void ARGBSepiaRow_NEON(uint8_t* dst_argb, int width);
void ARGBSepiaRow_MSA(uint8_t* dst_argb, int width);
+void ARGBSepiaRow_LSX(uint8_t* dst_argb, int width);
void ARGBSepiaRow_LASX(uint8_t* dst_argb, int width);
void ARGBColorMatrixRow_C(const uint8_t* src_argb,
@@ -5103,6 +5843,10 @@ void ARGBShadeRow_MSA(const uint8_t* src_argb,
uint8_t* dst_argb,
int width,
uint32_t value);
+void ARGBShadeRow_LSX(const uint8_t* src_argb,
+ uint8_t* dst_argb,
+ int width,
+ uint32_t value);
void ARGBShadeRow_LASX(const uint8_t* src_argb,
uint8_t* dst_argb,
int width,
@@ -5175,6 +5919,11 @@ void InterpolateRow_LSX(uint8_t* dst_ptr,
ptrdiff_t src_stride,
int width,
int source_y_fraction);
+void InterpolateRow_RVV(uint8_t* dst_ptr,
+ const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ int width,
+ int source_y_fraction);
void InterpolateRow_Any_NEON(uint8_t* dst_ptr,
const uint8_t* src_ptr,
ptrdiff_t src_stride_ptr,
@@ -5482,7 +6231,19 @@ void ByteToFloatRow_Any_NEON(const uint8_t* src_ptr,
float* dst_ptr,
float param,
int width);
-
+// Convert FP16 Half Floats to FP32 Floats
+void ConvertFP16ToFP32Row_NEON(const uint16_t* src, // fp16
+ float* dst,
+ int width);
+// Convert a column of FP16 Half Floats to a row of FP32 Floats
+void ConvertFP16ToFP32Column_NEON(const uint16_t* src, // fp16
+ int src_stride, // stride in elements
+ float* dst,
+ int width);
+// Convert FP32 Floats to FP16 Half Floats
+void ConvertFP32ToFP16Row_NEON(const float* src,
+ uint16_t* dst, // fp16
+ int width);
void ARGBLumaColorTableRow_C(const uint8_t* src_argb,
uint8_t* dst_argb,
int width,
@@ -5526,6 +6287,17 @@ void GaussCol_F32_C(const float* src0,
float* dst,
int width);
+void GaussRow_C(const uint32_t* src, uint16_t* dst, int width);
+void GaussCol_C(const uint16_t* src0,
+ const uint16_t* src1,
+ const uint16_t* src2,
+ const uint16_t* src3,
+ const uint16_t* src4,
+ uint32_t* dst,
+ int width);
+
+void ClampFloatToZero_SSE2(const float* src_x, float* dst_y, int width);
+
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
diff --git a/files/include/libyuv/scale.h b/include/libyuv/scale.h
index 443f89c2..bfe4a344 100644
--- a/files/include/libyuv/scale.h
+++ b/include/libyuv/scale.h
@@ -27,39 +27,40 @@ typedef enum FilterMode {
} FilterModeEnum;
// Scale a YUV plane.
+// Returns 0 if successful.
LIBYUV_API
-void ScalePlane(const uint8_t* src,
- int src_stride,
- int src_width,
- int src_height,
- uint8_t* dst,
- int dst_stride,
- int dst_width,
- int dst_height,
- enum FilterMode filtering);
+int ScalePlane(const uint8_t* src,
+ int src_stride,
+ int src_width,
+ int src_height,
+ uint8_t* dst,
+ int dst_stride,
+ int dst_width,
+ int dst_height,
+ enum FilterMode filtering);
LIBYUV_API
-void ScalePlane_16(const uint16_t* src,
- int src_stride,
- int src_width,
- int src_height,
- uint16_t* dst,
- int dst_stride,
- int dst_width,
- int dst_height,
- enum FilterMode filtering);
+int ScalePlane_16(const uint16_t* src,
+ int src_stride,
+ int src_width,
+ int src_height,
+ uint16_t* dst,
+ int dst_stride,
+ int dst_width,
+ int dst_height,
+ enum FilterMode filtering);
// Sample is expected to be in the low 12 bits.
LIBYUV_API
-void ScalePlane_12(const uint16_t* src,
- int src_stride,
- int src_width,
- int src_height,
- uint16_t* dst,
- int dst_stride,
- int dst_width,
- int dst_height,
- enum FilterMode filtering);
+int ScalePlane_12(const uint16_t* src,
+ int src_stride,
+ int src_width,
+ int src_height,
+ uint16_t* dst,
+ int dst_stride,
+ int dst_width,
+ int dst_height,
+ enum FilterMode filtering);
// Scales a YUV 4:2:0 image from the src width and height to the
// dst width and height.
diff --git a/files/include/libyuv/scale_argb.h b/include/libyuv/scale_argb.h
index 7641f18e..7641f18e 100644
--- a/files/include/libyuv/scale_argb.h
+++ b/include/libyuv/scale_argb.h
diff --git a/files/include/libyuv/scale_rgb.h b/include/libyuv/scale_rgb.h
index d17c39fd..d17c39fd 100644
--- a/files/include/libyuv/scale_rgb.h
+++ b/include/libyuv/scale_rgb.h
diff --git a/files/include/libyuv/scale_row.h b/include/libyuv/scale_row.h
index 6cb5e128..02ed61ca 100644
--- a/files/include/libyuv/scale_row.h
+++ b/include/libyuv/scale_row.h
@@ -29,7 +29,10 @@ extern "C" {
#endif
// MemorySanitizer does not support assembly code yet. http://crbug.com/344505
#if defined(__has_feature)
-#if __has_feature(memory_sanitizer)
+#if __has_feature(memory_sanitizer) && !defined(LIBYUV_DISABLE_NEON)
+#define LIBYUV_DISABLE_NEON
+#endif
+#if __has_feature(memory_sanitizer) && !defined(LIBYUV_DISABLE_X86)
#define LIBYUV_DISABLE_X86
#endif
#endif
@@ -133,6 +136,8 @@ extern "C" {
#define HAS_SCALEROWDOWN34_NEON
#define HAS_SCALEROWDOWN38_NEON
#define HAS_SCALEROWDOWN4_NEON
+#define HAS_SCALEUVROWDOWN2_NEON
+#define HAS_SCALEUVROWDOWN2LINEAR_NEON
#define HAS_SCALEUVROWDOWN2BOX_NEON
#define HAS_SCALEUVROWDOWNEVEN_NEON
#define HAS_SCALEROWUP2_LINEAR_NEON
@@ -173,6 +178,38 @@ extern "C" {
#define HAS_SCALEROWDOWN34_LSX
#endif
+#if !defined(LIBYUV_DISABLE_RVV) && defined(__riscv_vector)
+#define HAS_SCALEADDROW_RVV
+// TODO: Test ScaleARGBRowDownEven_RVV and enable it
+// #define HAS_SCALEARGBROWDOWNEVEN_RVV
+#define HAS_SCALEUVROWDOWN4_RVV
+#define HAS_SCALEUVROWDOWNEVEN_RVV
+#if __riscv_v_intrinsic == 11000
+#define HAS_SCALEARGBROWDOWN2_RVV
+#define HAS_SCALEARGBROWDOWN2BOX_RVV
+#define HAS_SCALEARGBROWDOWN2LINEAR_RVV
+#define HAS_SCALEARGBROWDOWNEVENBOX_RVV
+#define HAS_SCALEROWDOWN2_RVV
+#define HAS_SCALEROWDOWN2BOX_RVV
+#define HAS_SCALEROWDOWN2LINEAR_RVV
+#define HAS_SCALEROWDOWN34_0_BOX_RVV
+#define HAS_SCALEROWDOWN34_1_BOX_RVV
+#define HAS_SCALEROWDOWN34_RVV
+#define HAS_SCALEROWDOWN38_2_BOX_RVV
+#define HAS_SCALEROWDOWN38_3_BOX_RVV
+#define HAS_SCALEROWDOWN38_RVV
+#define HAS_SCALEROWDOWN4_RVV
+#define HAS_SCALEROWDOWN4BOX_RVV
+#define HAS_SCALEROWUP2_BILINEAR_RVV
+#define HAS_SCALEROWUP2_LINEAR_RVV
+#define HAS_SCALEUVROWDOWN2_RVV
+#define HAS_SCALEUVROWDOWN2BOX_RVV
+#define HAS_SCALEUVROWDOWN2LINEAR_RVV
+#define HAS_SCALEUVROWUP2_BILINEAR_RVV
+#define HAS_SCALEUVROWUP2_LINEAR_RVV
+#endif
+#endif
+
// Scale ARGB vertically with bilinear interpolation.
void ScalePlaneVertical(int src_height,
int dst_width,
@@ -214,6 +251,17 @@ void ScalePlaneVertical_16To8(int src_height,
int scale,
enum FilterMode filtering);
+void ScalePlaneDown2_16To8(int src_width,
+ int src_height,
+ int dst_width,
+ int dst_height,
+ int src_stride,
+ int dst_stride,
+ const uint16_t* src_ptr,
+ uint8_t* dst_ptr,
+ int scale,
+ enum FilterMode filtering);
+
// Simplify the filtering based on scale factors.
enum FilterMode ScaleFilterReduce(int src_width,
int src_height,
@@ -259,6 +307,16 @@ void ScaleRowDown2_16_C(const uint16_t* src_ptr,
ptrdiff_t src_stride,
uint16_t* dst,
int dst_width);
+void ScaleRowDown2_16To8_C(const uint16_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst,
+ int dst_width,
+ int scale);
+void ScaleRowDown2_16To8_Odd_C(const uint16_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst,
+ int dst_width,
+ int scale);
void ScaleRowDown2Linear_C(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst,
@@ -267,6 +325,16 @@ void ScaleRowDown2Linear_16_C(const uint16_t* src_ptr,
ptrdiff_t src_stride,
uint16_t* dst,
int dst_width);
+void ScaleRowDown2Linear_16To8_C(const uint16_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst,
+ int dst_width,
+ int scale);
+void ScaleRowDown2Linear_16To8_Odd_C(const uint16_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst,
+ int dst_width,
+ int scale);
void ScaleRowDown2Box_C(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst,
@@ -279,6 +347,16 @@ void ScaleRowDown2Box_16_C(const uint16_t* src_ptr,
ptrdiff_t src_stride,
uint16_t* dst,
int dst_width);
+void ScaleRowDown2Box_16To8_C(const uint16_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst,
+ int dst_width,
+ int scale);
+void ScaleRowDown2Box_16To8_Odd_C(const uint16_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst,
+ int dst_width,
+ int scale);
void ScaleRowDown4_C(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst,
@@ -906,6 +984,18 @@ void ScaleARGBRowDown2Box_NEON(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst,
int dst_width);
+void ScaleARGBRowDown2_RVV(const uint8_t* src_argb,
+ ptrdiff_t src_stride,
+ uint8_t* dst_argb,
+ int dst_width);
+void ScaleARGBRowDown2Linear_RVV(const uint8_t* src_argb,
+ ptrdiff_t src_stride,
+ uint8_t* dst_argb,
+ int dst_width);
+void ScaleARGBRowDown2Box_RVV(const uint8_t* src_argb,
+ ptrdiff_t src_stride,
+ uint8_t* dst_argb,
+ int dst_width);
void ScaleARGBRowDown2_MSA(const uint8_t* src_argb,
ptrdiff_t src_stride,
uint8_t* dst_argb,
@@ -1018,6 +1108,16 @@ void ScaleARGBRowDownEvenBox_LSX(const uint8_t* src_argb,
int src_stepx,
uint8_t* dst_argb,
int dst_width);
+void ScaleARGBRowDownEven_RVV(const uint8_t* src_argb,
+ ptrdiff_t src_stride,
+ int32_t src_stepx,
+ uint8_t* dst_argb,
+ int dst_width);
+void ScaleARGBRowDownEvenBox_RVV(const uint8_t* src_argb,
+ ptrdiff_t src_stride,
+ int src_stepx,
+ uint8_t* dst_argb,
+ int dst_width);
void ScaleARGBRowDownEven_Any_SSE2(const uint8_t* src_ptr,
ptrdiff_t src_stride,
int src_stepx,
@@ -1100,6 +1200,18 @@ void ScaleUVRowDown2Box_MSA(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_uv,
int dst_width);
+void ScaleUVRowDown2_RVV(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_uv,
+ int dst_width);
+void ScaleUVRowDown2Linear_RVV(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_uv,
+ int dst_width);
+void ScaleUVRowDown2Box_RVV(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst,
+ int dst_width);
void ScaleUVRowDown2_Any_SSSE3(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
@@ -1160,6 +1272,16 @@ void ScaleUVRowDownEvenBox_NEON(const uint8_t* src_ptr,
int src_stepx,
uint8_t* dst_uv,
int dst_width);
+void ScaleUVRowDown4_RVV(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ int32_t src_stepx,
+ uint8_t* dst_uv,
+ int dst_width);
+void ScaleUVRowDownEven_RVV(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ int32_t src_stepx,
+ uint8_t* dst_uv,
+ int dst_width);
void ScaleUVRowDownEven_MSA(const uint8_t* src_ptr,
ptrdiff_t src_stride,
int32_t src_stepx,
@@ -1249,6 +1371,14 @@ void ScaleUVRowUp2_Bilinear_Any_NEON(const uint8_t* src_ptr,
uint8_t* dst_ptr,
ptrdiff_t dst_stride,
int dst_width);
+void ScaleUVRowUp2_Linear_RVV(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ int dst_width);
+void ScaleUVRowUp2_Bilinear_RVV(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ ptrdiff_t dst_stride,
+ int dst_width);
void ScaleUVRowUp2_Linear_16_SSE41(const uint16_t* src_ptr,
uint16_t* dst_ptr,
int dst_width);
@@ -1701,6 +1831,61 @@ void ScaleRowDown34_1_Box_Any_LSX(const uint8_t* src_ptr,
uint8_t* dst_ptr,
int dst_width);
+void ScaleAddRow_RVV(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width);
+void ScaleRowDown2_RVV(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst,
+ int dst_width);
+void ScaleRowDown2Linear_RVV(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst,
+ int dst_width);
+void ScaleRowDown2Box_RVV(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst,
+ int dst_width);
+
+void ScaleRowDown4_RVV(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width);
+void ScaleRowDown4Box_RVV(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width);
+void ScaleRowDown34_RVV(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width);
+void ScaleRowDown34_0_Box_RVV(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width);
+void ScaleRowDown34_1_Box_RVV(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width);
+void ScaleRowDown38_RVV(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst,
+ int dst_width);
+void ScaleRowDown38_3_Box_RVV(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width);
+void ScaleRowDown38_2_Box_RVV(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width);
+
+void ScaleRowUp2_Linear_RVV(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ int dst_width);
+void ScaleRowUp2_Bilinear_RVV(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ ptrdiff_t dst_stride,
+ int dst_width);
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
diff --git a/files/include/libyuv/scale_uv.h b/include/libyuv/scale_uv.h
index 8e74e319..8e74e319 100644
--- a/files/include/libyuv/scale_uv.h
+++ b/include/libyuv/scale_uv.h
diff --git a/files/include/libyuv/version.h b/include/libyuv/version.h
index a85be048..a9c54400 100644
--- a/files/include/libyuv/version.h
+++ b/include/libyuv/version.h
@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_
#define INCLUDE_LIBYUV_VERSION_H_
-#define LIBYUV_VERSION 1837
+#define LIBYUV_VERSION 1883
#endif // INCLUDE_LIBYUV_VERSION_H_
diff --git a/files/include/libyuv/video_common.h b/include/libyuv/video_common.h
index 32b8a521..32b8a521 100644
--- a/files/include/libyuv/video_common.h
+++ b/include/libyuv/video_common.h
diff --git a/infra/config/OWNERS b/infra/config/OWNERS
new file mode 100644
index 00000000..2c4f90a0
--- /dev/null
+++ b/infra/config/OWNERS
@@ -0,0 +1,3 @@
+fbarchard@chromium.org
+mbonadei@chromium.org
+jansson@google.com
diff --git a/files/infra/config/PRESUBMIT.py b/infra/config/PRESUBMIT.py
index 01ec0eed..f79e08ad 100644
--- a/files/infra/config/PRESUBMIT.py
+++ b/infra/config/PRESUBMIT.py
@@ -2,6 +2,8 @@
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
+USE_PYTHON3 = True
+
def CheckChangeOnUpload(input_api, output_api):
return input_api.canned_checks.CheckChangedLUCIConfigs(input_api, output_api)
diff --git a/files/infra/config/README.md b/infra/config/README.md
index e5e3b5f8..e5e3b5f8 100644
--- a/files/infra/config/README.md
+++ b/infra/config/README.md
diff --git a/files/infra/config/codereview.settings b/infra/config/codereview.settings
index 6d742273..6d742273 100644
--- a/files/infra/config/codereview.settings
+++ b/infra/config/codereview.settings
diff --git a/files/infra/config/commit-queue.cfg b/infra/config/commit-queue.cfg
index 4a8d77f4..4a8d77f4 100644
--- a/files/infra/config/commit-queue.cfg
+++ b/infra/config/commit-queue.cfg
diff --git a/files/infra/config/cr-buildbucket.cfg b/infra/config/cr-buildbucket.cfg
index 061cf33b..7415851b 100644
--- a/files/infra/config/cr-buildbucket.cfg
+++ b/infra/config/cr-buildbucket.cfg
@@ -29,10 +29,9 @@ buckets {
}
properties:
'{'
- ' "$build/goma": {'
- ' "enable_ats": true,'
- ' "server_host": "goma.chromium.org",'
- ' "use_luci_auth": true'
+ ' "$build/reclient": {'
+ ' "instance": "rbe-webrtc-trusted",'
+ ' "metrics_project": "chromium-reclient-metrics"'
' },'
' "builder_group": "client.libyuv",'
' "recipe": "libyuv/libyuv"'
@@ -60,10 +59,9 @@ buckets {
}
properties:
'{'
- ' "$build/goma": {'
- ' "enable_ats": true,'
- ' "server_host": "goma.chromium.org",'
- ' "use_luci_auth": true'
+ ' "$build/reclient": {'
+ ' "instance": "rbe-webrtc-trusted",'
+ ' "metrics_project": "chromium-reclient-metrics"'
' },'
' "builder_group": "client.libyuv",'
' "recipe": "libyuv/libyuv"'
@@ -91,10 +89,9 @@ buckets {
}
properties:
'{'
- ' "$build/goma": {'
- ' "enable_ats": true,'
- ' "server_host": "goma.chromium.org",'
- ' "use_luci_auth": true'
+ ' "$build/reclient": {'
+ ' "instance": "rbe-webrtc-trusted",'
+ ' "metrics_project": "chromium-reclient-metrics"'
' },'
' "builder_group": "client.libyuv",'
' "recipe": "libyuv/libyuv"'
@@ -111,7 +108,7 @@ buckets {
name: "Android Tester ARM32 Debug (Nexus 5X)"
swarming_host: "chromium-swarm.appspot.com"
swarming_tags: "vpython:native-python-wrapper"
- dimensions: "device_type:bullhead"
+ dimensions: "device_type:walleye"
dimensions: "pool:luci.flex.ci"
exe {
cipd_package: "infra/recipe_bundles/chromium.googlesource.com/chromium/tools/build"
@@ -120,9 +117,9 @@ buckets {
}
properties:
'{'
- ' "$build/goma": {'
- ' "server_host": "goma.chromium.org",'
- ' "use_luci_auth": true'
+ ' "$build/reclient": {'
+ ' "instance": "rbe-webrtc-trusted",'
+ ' "metrics_project": "chromium-reclient-metrics"'
' },'
' "builder_group": "client.libyuv",'
' "recipe": "libyuv/libyuv"'
@@ -139,7 +136,7 @@ buckets {
name: "Android Tester ARM32 Release (Nexus 5X)"
swarming_host: "chromium-swarm.appspot.com"
swarming_tags: "vpython:native-python-wrapper"
- dimensions: "device_type:bullhead"
+ dimensions: "device_type:walleye"
dimensions: "pool:luci.flex.ci"
exe {
cipd_package: "infra/recipe_bundles/chromium.googlesource.com/chromium/tools/build"
@@ -148,9 +145,9 @@ buckets {
}
properties:
'{'
- ' "$build/goma": {'
- ' "server_host": "goma.chromium.org",'
- ' "use_luci_auth": true'
+ ' "$build/reclient": {'
+ ' "instance": "rbe-webrtc-trusted",'
+ ' "metrics_project": "chromium-reclient-metrics"'
' },'
' "builder_group": "client.libyuv",'
' "recipe": "libyuv/libyuv"'
@@ -167,7 +164,7 @@ buckets {
name: "Android Tester ARM64 Debug (Nexus 5X)"
swarming_host: "chromium-swarm.appspot.com"
swarming_tags: "vpython:native-python-wrapper"
- dimensions: "device_type:bullhead"
+ dimensions: "device_type:walleye"
dimensions: "pool:luci.flex.ci"
exe {
cipd_package: "infra/recipe_bundles/chromium.googlesource.com/chromium/tools/build"
@@ -176,9 +173,9 @@ buckets {
}
properties:
'{'
- ' "$build/goma": {'
- ' "server_host": "goma.chromium.org",'
- ' "use_luci_auth": true'
+ ' "$build/reclient": {'
+ ' "instance": "rbe-webrtc-trusted",'
+ ' "metrics_project": "chromium-reclient-metrics"'
' },'
' "builder_group": "client.libyuv",'
' "recipe": "libyuv/libyuv"'
@@ -206,10 +203,9 @@ buckets {
}
properties:
'{'
- ' "$build/goma": {'
- ' "enable_ats": true,'
- ' "server_host": "goma.chromium.org",'
- ' "use_luci_auth": true'
+ ' "$build/reclient": {'
+ ' "instance": "rbe-webrtc-trusted",'
+ ' "metrics_project": "chromium-reclient-metrics"'
' },'
' "builder_group": "client.libyuv",'
' "recipe": "libyuv/libyuv"'
@@ -237,10 +233,9 @@ buckets {
}
properties:
'{'
- ' "$build/goma": {'
- ' "enable_ats": true,'
- ' "server_host": "goma.chromium.org",'
- ' "use_luci_auth": true'
+ ' "$build/reclient": {'
+ ' "instance": "rbe-webrtc-trusted",'
+ ' "metrics_project": "chromium-reclient-metrics"'
' },'
' "builder_group": "client.libyuv",'
' "recipe": "libyuv/libyuv"'
@@ -268,10 +263,9 @@ buckets {
}
properties:
'{'
- ' "$build/goma": {'
- ' "enable_ats": true,'
- ' "server_host": "goma.chromium.org",'
- ' "use_luci_auth": true'
+ ' "$build/reclient": {'
+ ' "instance": "rbe-webrtc-trusted",'
+ ' "metrics_project": "chromium-reclient-metrics"'
' },'
' "builder_group": "client.libyuv",'
' "recipe": "libyuv/libyuv"'
@@ -299,10 +293,9 @@ buckets {
}
properties:
'{'
- ' "$build/goma": {'
- ' "enable_ats": true,'
- ' "server_host": "goma.chromium.org",'
- ' "use_luci_auth": true'
+ ' "$build/reclient": {'
+ ' "instance": "rbe-webrtc-trusted",'
+ ' "metrics_project": "chromium-reclient-metrics"'
' },'
' "builder_group": "client.libyuv",'
' "recipe": "libyuv/libyuv"'
@@ -330,10 +323,9 @@ buckets {
}
properties:
'{'
- ' "$build/goma": {'
- ' "enable_ats": true,'
- ' "server_host": "goma.chromium.org",'
- ' "use_luci_auth": true'
+ ' "$build/reclient": {'
+ ' "instance": "rbe-webrtc-trusted",'
+ ' "metrics_project": "chromium-reclient-metrics"'
' },'
' "builder_group": "client.libyuv",'
' "recipe": "libyuv/libyuv"'
@@ -361,10 +353,9 @@ buckets {
}
properties:
'{'
- ' "$build/goma": {'
- ' "enable_ats": true,'
- ' "server_host": "goma.chromium.org",'
- ' "use_luci_auth": true'
+ ' "$build/reclient": {'
+ ' "instance": "rbe-webrtc-trusted",'
+ ' "metrics_project": "chromium-reclient-metrics"'
' },'
' "builder_group": "client.libyuv",'
' "recipe": "libyuv/libyuv"'
@@ -392,10 +383,9 @@ buckets {
}
properties:
'{'
- ' "$build/goma": {'
- ' "enable_ats": true,'
- ' "server_host": "goma.chromium.org",'
- ' "use_luci_auth": true'
+ ' "$build/reclient": {'
+ ' "instance": "rbe-webrtc-trusted",'
+ ' "metrics_project": "chromium-reclient-metrics"'
' },'
' "builder_group": "client.libyuv",'
' "recipe": "libyuv/libyuv"'
@@ -423,10 +413,9 @@ buckets {
}
properties:
'{'
- ' "$build/goma": {'
- ' "enable_ats": true,'
- ' "server_host": "goma.chromium.org",'
- ' "use_luci_auth": true'
+ ' "$build/reclient": {'
+ ' "instance": "rbe-webrtc-trusted",'
+ ' "metrics_project": "chromium-reclient-metrics"'
' },'
' "builder_group": "client.libyuv",'
' "recipe": "libyuv/libyuv"'
@@ -454,10 +443,9 @@ buckets {
}
properties:
'{'
- ' "$build/goma": {'
- ' "enable_ats": true,'
- ' "server_host": "goma.chromium.org",'
- ' "use_luci_auth": true'
+ ' "$build/reclient": {'
+ ' "instance": "rbe-webrtc-trusted",'
+ ' "metrics_project": "chromium-reclient-metrics"'
' },'
' "builder_group": "client.libyuv",'
' "recipe": "libyuv/libyuv"'
@@ -485,10 +473,9 @@ buckets {
}
properties:
'{'
- ' "$build/goma": {'
- ' "enable_ats": true,'
- ' "server_host": "goma.chromium.org",'
- ' "use_luci_auth": true'
+ ' "$build/reclient": {'
+ ' "instance": "rbe-webrtc-trusted",'
+ ' "metrics_project": "chromium-reclient-metrics"'
' },'
' "builder_group": "client.libyuv",'
' "recipe": "libyuv/libyuv"'
@@ -516,10 +503,9 @@ buckets {
}
properties:
'{'
- ' "$build/goma": {'
- ' "enable_ats": true,'
- ' "server_host": "goma.chromium.org",'
- ' "use_luci_auth": true'
+ ' "$build/reclient": {'
+ ' "instance": "rbe-webrtc-trusted",'
+ ' "metrics_project": "chromium-reclient-metrics"'
' },'
' "builder_group": "client.libyuv",'
' "recipe": "libyuv/libyuv"'
@@ -537,7 +523,7 @@ buckets {
swarming_host: "chromium-swarm.appspot.com"
swarming_tags: "vpython:native-python-wrapper"
dimensions: "cpu:x86-64"
- dimensions: "os:Mac-10.15"
+ dimensions: "os:Mac-12"
dimensions: "pool:luci.flex.ci"
exe {
cipd_package: "infra/recipe_bundles/chromium.googlesource.com/chromium/tools/build"
@@ -546,9 +532,9 @@ buckets {
}
properties:
'{'
- ' "$build/goma": {'
- ' "server_host": "goma.chromium.org",'
- ' "use_luci_auth": true'
+ ' "$build/reclient": {'
+ ' "instance": "rbe-webrtc-trusted",'
+ ' "metrics_project": "chromium-reclient-metrics"'
' },'
' "builder_group": "client.libyuv",'
' "recipe": "libyuv/libyuv"'
@@ -566,7 +552,7 @@ buckets {
swarming_host: "chromium-swarm.appspot.com"
swarming_tags: "vpython:native-python-wrapper"
dimensions: "cpu:x86-64"
- dimensions: "os:Mac-10.15"
+ dimensions: "os:Mac-12"
dimensions: "pool:luci.flex.ci"
exe {
cipd_package: "infra/recipe_bundles/chromium.googlesource.com/chromium/tools/build"
@@ -575,9 +561,9 @@ buckets {
}
properties:
'{'
- ' "$build/goma": {'
- ' "server_host": "goma.chromium.org",'
- ' "use_luci_auth": true'
+ ' "$build/reclient": {'
+ ' "instance": "rbe-webrtc-trusted",'
+ ' "metrics_project": "chromium-reclient-metrics"'
' },'
' "builder_group": "client.libyuv",'
' "recipe": "libyuv/libyuv"'
@@ -595,7 +581,7 @@ buckets {
swarming_host: "chromium-swarm.appspot.com"
swarming_tags: "vpython:native-python-wrapper"
dimensions: "cpu:x86-64"
- dimensions: "os:Mac-10.15"
+ dimensions: "os:Mac-12"
dimensions: "pool:luci.flex.ci"
exe {
cipd_package: "infra/recipe_bundles/chromium.googlesource.com/chromium/tools/build"
@@ -604,9 +590,9 @@ buckets {
}
properties:
'{'
- ' "$build/goma": {'
- ' "server_host": "goma.chromium.org",'
- ' "use_luci_auth": true'
+ ' "$build/reclient": {'
+ ' "instance": "rbe-webrtc-trusted",'
+ ' "metrics_project": "chromium-reclient-metrics"'
' },'
' "builder_group": "client.libyuv",'
' "recipe": "libyuv/libyuv"'
@@ -634,10 +620,9 @@ buckets {
}
properties:
'{'
- ' "$build/goma": {'
- ' "enable_ats": true,'
- ' "server_host": "goma.chromium.org",'
- ' "use_luci_auth": true'
+ ' "$build/reclient": {'
+ ' "instance": "rbe-webrtc-trusted",'
+ ' "metrics_project": "chromium-reclient-metrics"'
' },'
' "builder_group": "client.libyuv",'
' "recipe": "libyuv/libyuv"'
@@ -665,10 +650,9 @@ buckets {
}
properties:
'{'
- ' "$build/goma": {'
- ' "enable_ats": true,'
- ' "server_host": "goma.chromium.org",'
- ' "use_luci_auth": true'
+ ' "$build/reclient": {'
+ ' "instance": "rbe-webrtc-trusted",'
+ ' "metrics_project": "chromium-reclient-metrics"'
' },'
' "builder_group": "client.libyuv",'
' "recipe": "libyuv/libyuv"'
@@ -696,10 +680,9 @@ buckets {
}
properties:
'{'
- ' "$build/goma": {'
- ' "enable_ats": true,'
- ' "server_host": "goma.chromium.org",'
- ' "use_luci_auth": true'
+ ' "$build/reclient": {'
+ ' "instance": "rbe-webrtc-trusted",'
+ ' "metrics_project": "chromium-reclient-metrics"'
' },'
' "builder_group": "client.libyuv",'
' "recipe": "libyuv/libyuv"'
@@ -727,10 +710,9 @@ buckets {
}
properties:
'{'
- ' "$build/goma": {'
- ' "enable_ats": true,'
- ' "server_host": "goma.chromium.org",'
- ' "use_luci_auth": true'
+ ' "$build/reclient": {'
+ ' "instance": "rbe-webrtc-trusted",'
+ ' "metrics_project": "chromium-reclient-metrics"'
' },'
' "builder_group": "client.libyuv",'
' "recipe": "libyuv/libyuv"'
@@ -758,10 +740,9 @@ buckets {
}
properties:
'{'
- ' "$build/goma": {'
- ' "enable_ats": true,'
- ' "server_host": "goma.chromium.org",'
- ' "use_luci_auth": true'
+ ' "$build/reclient": {'
+ ' "instance": "rbe-webrtc-trusted",'
+ ' "metrics_project": "chromium-reclient-metrics"'
' },'
' "builder_group": "client.libyuv",'
' "recipe": "libyuv/libyuv"'
@@ -789,10 +770,9 @@ buckets {
}
properties:
'{'
- ' "$build/goma": {'
- ' "enable_ats": true,'
- ' "server_host": "goma.chromium.org",'
- ' "use_luci_auth": true'
+ ' "$build/reclient": {'
+ ' "instance": "rbe-webrtc-trusted",'
+ ' "metrics_project": "chromium-reclient-metrics"'
' },'
' "builder_group": "client.libyuv",'
' "recipe": "libyuv/libyuv"'
@@ -820,10 +800,9 @@ buckets {
}
properties:
'{'
- ' "$build/goma": {'
- ' "enable_ats": true,'
- ' "server_host": "goma.chromium.org",'
- ' "use_luci_auth": true'
+ ' "$build/reclient": {'
+ ' "instance": "rbe-webrtc-trusted",'
+ ' "metrics_project": "chromium-reclient-metrics"'
' },'
' "builder_group": "client.libyuv",'
' "recipe": "libyuv/libyuv"'
@@ -851,10 +830,9 @@ buckets {
}
properties:
'{'
- ' "$build/goma": {'
- ' "enable_ats": true,'
- ' "server_host": "goma.chromium.org",'
- ' "use_luci_auth": true'
+ ' "$build/reclient": {'
+ ' "instance": "rbe-webrtc-trusted",'
+ ' "metrics_project": "chromium-reclient-metrics"'
' },'
' "builder_group": "client.libyuv",'
' "recipe": "libyuv/libyuv"'
@@ -872,7 +850,7 @@ buckets {
swarming_host: "chromium-swarm.appspot.com"
swarming_tags: "vpython:native-python-wrapper"
dimensions: "cpu:x86-64"
- dimensions: "os:Mac-10.15"
+ dimensions: "os:Mac-12"
dimensions: "pool:luci.flex.ci"
exe {
cipd_package: "infra/recipe_bundles/chromium.googlesource.com/chromium/tools/build"
@@ -881,9 +859,9 @@ buckets {
}
properties:
'{'
- ' "$build/goma": {'
- ' "server_host": "goma.chromium.org",'
- ' "use_luci_auth": true'
+ ' "$build/reclient": {'
+ ' "instance": "rbe-webrtc-trusted",'
+ ' "metrics_project": "chromium-reclient-metrics"'
' },'
' "builder_group": "client.libyuv",'
' "recipe": "libyuv/libyuv"'
@@ -901,7 +879,7 @@ buckets {
swarming_host: "chromium-swarm.appspot.com"
swarming_tags: "vpython:native-python-wrapper"
dimensions: "cpu:x86-64"
- dimensions: "os:Mac-10.15"
+ dimensions: "os:Mac-12"
dimensions: "pool:luci.flex.ci"
exe {
cipd_package: "infra/recipe_bundles/chromium.googlesource.com/chromium/tools/build"
@@ -910,9 +888,9 @@ buckets {
}
properties:
'{'
- ' "$build/goma": {'
- ' "server_host": "goma.chromium.org",'
- ' "use_luci_auth": true'
+ ' "$build/reclient": {'
+ ' "instance": "rbe-webrtc-trusted",'
+ ' "metrics_project": "chromium-reclient-metrics"'
' },'
' "builder_group": "client.libyuv",'
' "recipe": "libyuv/libyuv"'
@@ -985,7 +963,7 @@ buckets {
name: "android"
swarming_host: "chromium-swarm.appspot.com"
swarming_tags: "vpython:native-python-wrapper"
- dimensions: "device_type:bullhead"
+ dimensions: "device_type:walleye"
dimensions: "pool:luci.flex.try"
exe {
cipd_package: "infra/recipe_bundles/chromium.googlesource.com/chromium/tools/build"
@@ -994,9 +972,9 @@ buckets {
}
properties:
'{'
- ' "$build/goma": {'
- ' "server_host": "goma.chromium.org",'
- ' "use_luci_auth": true'
+ ' "$build/reclient": {'
+ ' "instance": "rbe-webrtc-untrusted",'
+ ' "metrics_project": "chromium-reclient-metrics"'
' },'
' "builder_group": "tryserver.libyuv",'
' "recipe": "libyuv/libyuv"'
@@ -1013,7 +991,7 @@ buckets {
name: "android_arm64"
swarming_host: "chromium-swarm.appspot.com"
swarming_tags: "vpython:native-python-wrapper"
- dimensions: "device_type:bullhead"
+ dimensions: "device_type:walleye"
dimensions: "pool:luci.flex.try"
exe {
cipd_package: "infra/recipe_bundles/chromium.googlesource.com/chromium/tools/build"
@@ -1022,9 +1000,9 @@ buckets {
}
properties:
'{'
- ' "$build/goma": {'
- ' "server_host": "goma.chromium.org",'
- ' "use_luci_auth": true'
+ ' "$build/reclient": {'
+ ' "instance": "rbe-webrtc-untrusted",'
+ ' "metrics_project": "chromium-reclient-metrics"'
' },'
' "builder_group": "tryserver.libyuv",'
' "recipe": "libyuv/libyuv"'
@@ -1041,7 +1019,7 @@ buckets {
name: "android_rel"
swarming_host: "chromium-swarm.appspot.com"
swarming_tags: "vpython:native-python-wrapper"
- dimensions: "device_type:bullhead"
+ dimensions: "device_type:walleye"
dimensions: "pool:luci.flex.try"
exe {
cipd_package: "infra/recipe_bundles/chromium.googlesource.com/chromium/tools/build"
@@ -1050,9 +1028,9 @@ buckets {
}
properties:
'{'
- ' "$build/goma": {'
- ' "server_host": "goma.chromium.org",'
- ' "use_luci_auth": true'
+ ' "$build/reclient": {'
+ ' "instance": "rbe-webrtc-untrusted",'
+ ' "metrics_project": "chromium-reclient-metrics"'
' },'
' "builder_group": "tryserver.libyuv",'
' "recipe": "libyuv/libyuv"'
@@ -1080,10 +1058,9 @@ buckets {
}
properties:
'{'
- ' "$build/goma": {'
- ' "enable_ats": true,'
- ' "server_host": "goma.chromium.org",'
- ' "use_luci_auth": true'
+ ' "$build/reclient": {'
+ ' "instance": "rbe-webrtc-untrusted",'
+ ' "metrics_project": "chromium-reclient-metrics"'
' },'
' "builder_group": "tryserver.libyuv",'
' "recipe": "libyuv/libyuv"'
@@ -1111,10 +1088,9 @@ buckets {
}
properties:
'{'
- ' "$build/goma": {'
- ' "enable_ats": true,'
- ' "server_host": "goma.chromium.org",'
- ' "use_luci_auth": true'
+ ' "$build/reclient": {'
+ ' "instance": "rbe-webrtc-untrusted",'
+ ' "metrics_project": "chromium-reclient-metrics"'
' },'
' "builder_group": "tryserver.libyuv",'
' "recipe": "libyuv/libyuv"'
@@ -1132,7 +1108,7 @@ buckets {
swarming_host: "chromium-swarm.appspot.com"
swarming_tags: "vpython:native-python-wrapper"
dimensions: "cpu:x86-64"
- dimensions: "os:Mac-10.15"
+ dimensions: "os:Mac-12"
dimensions: "pool:luci.flex.try"
exe {
cipd_package: "infra/recipe_bundles/chromium.googlesource.com/chromium/tools/build"
@@ -1141,9 +1117,9 @@ buckets {
}
properties:
'{'
- ' "$build/goma": {'
- ' "server_host": "goma.chromium.org",'
- ' "use_luci_auth": true'
+ ' "$build/reclient": {'
+ ' "instance": "rbe-webrtc-untrusted",'
+ ' "metrics_project": "chromium-reclient-metrics"'
' },'
' "builder_group": "tryserver.libyuv",'
' "recipe": "libyuv/libyuv"'
@@ -1161,7 +1137,7 @@ buckets {
swarming_host: "chromium-swarm.appspot.com"
swarming_tags: "vpython:native-python-wrapper"
dimensions: "cpu:x86-64"
- dimensions: "os:Mac-10.15"
+ dimensions: "os:Mac-12"
dimensions: "pool:luci.flex.try"
exe {
cipd_package: "infra/recipe_bundles/chromium.googlesource.com/chromium/tools/build"
@@ -1170,9 +1146,9 @@ buckets {
}
properties:
'{'
- ' "$build/goma": {'
- ' "server_host": "goma.chromium.org",'
- ' "use_luci_auth": true'
+ ' "$build/reclient": {'
+ ' "instance": "rbe-webrtc-untrusted",'
+ ' "metrics_project": "chromium-reclient-metrics"'
' },'
' "builder_group": "tryserver.libyuv",'
' "recipe": "libyuv/libyuv"'
@@ -1200,10 +1176,9 @@ buckets {
}
properties:
'{'
- ' "$build/goma": {'
- ' "enable_ats": true,'
- ' "server_host": "goma.chromium.org",'
- ' "use_luci_auth": true'
+ ' "$build/reclient": {'
+ ' "instance": "rbe-webrtc-untrusted",'
+ ' "metrics_project": "chromium-reclient-metrics"'
' },'
' "builder_group": "tryserver.libyuv",'
' "recipe": "libyuv/libyuv"'
@@ -1231,10 +1206,9 @@ buckets {
}
properties:
'{'
- ' "$build/goma": {'
- ' "enable_ats": true,'
- ' "server_host": "goma.chromium.org",'
- ' "use_luci_auth": true'
+ ' "$build/reclient": {'
+ ' "instance": "rbe-webrtc-untrusted",'
+ ' "metrics_project": "chromium-reclient-metrics"'
' },'
' "builder_group": "tryserver.libyuv",'
' "recipe": "libyuv/libyuv"'
@@ -1262,10 +1236,9 @@ buckets {
}
properties:
'{'
- ' "$build/goma": {'
- ' "enable_ats": true,'
- ' "server_host": "goma.chromium.org",'
- ' "use_luci_auth": true'
+ ' "$build/reclient": {'
+ ' "instance": "rbe-webrtc-untrusted",'
+ ' "metrics_project": "chromium-reclient-metrics"'
' },'
' "builder_group": "tryserver.libyuv",'
' "recipe": "libyuv/libyuv"'
@@ -1293,10 +1266,9 @@ buckets {
}
properties:
'{'
- ' "$build/goma": {'
- ' "enable_ats": true,'
- ' "server_host": "goma.chromium.org",'
- ' "use_luci_auth": true'
+ ' "$build/reclient": {'
+ ' "instance": "rbe-webrtc-untrusted",'
+ ' "metrics_project": "chromium-reclient-metrics"'
' },'
' "builder_group": "tryserver.libyuv",'
' "recipe": "libyuv/libyuv"'
@@ -1324,10 +1296,9 @@ buckets {
}
properties:
'{'
- ' "$build/goma": {'
- ' "enable_ats": true,'
- ' "server_host": "goma.chromium.org",'
- ' "use_luci_auth": true'
+ ' "$build/reclient": {'
+ ' "instance": "rbe-webrtc-untrusted",'
+ ' "metrics_project": "chromium-reclient-metrics"'
' },'
' "builder_group": "tryserver.libyuv",'
' "recipe": "libyuv/libyuv"'
@@ -1355,10 +1326,9 @@ buckets {
}
properties:
'{'
- ' "$build/goma": {'
- ' "enable_ats": true,'
- ' "server_host": "goma.chromium.org",'
- ' "use_luci_auth": true'
+ ' "$build/reclient": {'
+ ' "instance": "rbe-webrtc-untrusted",'
+ ' "metrics_project": "chromium-reclient-metrics"'
' },'
' "builder_group": "tryserver.libyuv",'
' "recipe": "libyuv/libyuv"'
@@ -1386,10 +1356,9 @@ buckets {
}
properties:
'{'
- ' "$build/goma": {'
- ' "enable_ats": true,'
- ' "server_host": "goma.chromium.org",'
- ' "use_luci_auth": true'
+ ' "$build/reclient": {'
+ ' "instance": "rbe-webrtc-untrusted",'
+ ' "metrics_project": "chromium-reclient-metrics"'
' },'
' "builder_group": "tryserver.libyuv",'
' "recipe": "libyuv/libyuv"'
@@ -1417,10 +1386,9 @@ buckets {
}
properties:
'{'
- ' "$build/goma": {'
- ' "enable_ats": true,'
- ' "server_host": "goma.chromium.org",'
- ' "use_luci_auth": true'
+ ' "$build/reclient": {'
+ ' "instance": "rbe-webrtc-untrusted",'
+ ' "metrics_project": "chromium-reclient-metrics"'
' },'
' "builder_group": "tryserver.libyuv",'
' "recipe": "libyuv/libyuv"'
@@ -1438,7 +1406,7 @@ buckets {
swarming_host: "chromium-swarm.appspot.com"
swarming_tags: "vpython:native-python-wrapper"
dimensions: "cpu:x86-64"
- dimensions: "os:Mac-10.15"
+ dimensions: "os:Mac-12"
dimensions: "pool:luci.flex.try"
exe {
cipd_package: "infra/recipe_bundles/chromium.googlesource.com/chromium/tools/build"
@@ -1447,9 +1415,9 @@ buckets {
}
properties:
'{'
- ' "$build/goma": {'
- ' "server_host": "goma.chromium.org",'
- ' "use_luci_auth": true'
+ ' "$build/reclient": {'
+ ' "instance": "rbe-webrtc-untrusted",'
+ ' "metrics_project": "chromium-reclient-metrics"'
' },'
' "builder_group": "tryserver.libyuv",'
' "recipe": "libyuv/libyuv"'
@@ -1467,7 +1435,7 @@ buckets {
swarming_host: "chromium-swarm.appspot.com"
swarming_tags: "vpython:native-python-wrapper"
dimensions: "cpu:x86-64"
- dimensions: "os:Mac-10.15"
+ dimensions: "os:Mac-12"
dimensions: "pool:luci.flex.try"
exe {
cipd_package: "infra/recipe_bundles/chromium.googlesource.com/chromium/tools/build"
@@ -1476,9 +1444,9 @@ buckets {
}
properties:
'{'
- ' "$build/goma": {'
- ' "server_host": "goma.chromium.org",'
- ' "use_luci_auth": true'
+ ' "$build/reclient": {'
+ ' "instance": "rbe-webrtc-untrusted",'
+ ' "metrics_project": "chromium-reclient-metrics"'
' },'
' "builder_group": "tryserver.libyuv",'
' "recipe": "libyuv/libyuv"'
@@ -1496,7 +1464,7 @@ buckets {
swarming_host: "chromium-swarm.appspot.com"
swarming_tags: "vpython:native-python-wrapper"
dimensions: "cpu:x86-64"
- dimensions: "os:Mac-10.15"
+ dimensions: "os:Mac-12"
dimensions: "pool:luci.flex.try"
exe {
cipd_package: "infra/recipe_bundles/chromium.googlesource.com/chromium/tools/build"
@@ -1505,9 +1473,9 @@ buckets {
}
properties:
'{'
- ' "$build/goma": {'
- ' "server_host": "goma.chromium.org",'
- ' "use_luci_auth": true'
+ ' "$build/reclient": {'
+ ' "instance": "rbe-webrtc-untrusted",'
+ ' "metrics_project": "chromium-reclient-metrics"'
' },'
' "builder_group": "tryserver.libyuv",'
' "recipe": "libyuv/libyuv"'
@@ -1535,10 +1503,9 @@ buckets {
}
properties:
'{'
- ' "$build/goma": {'
- ' "enable_ats": true,'
- ' "server_host": "goma.chromium.org",'
- ' "use_luci_auth": true'
+ ' "$build/reclient": {'
+ ' "instance": "rbe-webrtc-untrusted",'
+ ' "metrics_project": "chromium-reclient-metrics"'
' },'
' "builder_group": "tryserver.libyuv",'
' "recipe": "run_presubmit",'
@@ -1568,10 +1535,9 @@ buckets {
}
properties:
'{'
- ' "$build/goma": {'
- ' "enable_ats": false,'
- ' "server_host": "goma.chromium.org",'
- ' "use_luci_auth": true'
+ ' "$build/reclient": {'
+ ' "instance": "rbe-webrtc-untrusted",'
+ ' "metrics_project": "chromium-reclient-metrics"'
' },'
' "builder_group": "tryserver.libyuv",'
' "recipe": "libyuv/libyuv"'
@@ -1599,10 +1565,9 @@ buckets {
}
properties:
'{'
- ' "$build/goma": {'
- ' "enable_ats": false,'
- ' "server_host": "goma.chromium.org",'
- ' "use_luci_auth": true'
+ ' "$build/reclient": {'
+ ' "instance": "rbe-webrtc-untrusted",'
+ ' "metrics_project": "chromium-reclient-metrics"'
' },'
' "builder_group": "tryserver.libyuv",'
' "recipe": "libyuv/libyuv"'
@@ -1630,10 +1595,9 @@ buckets {
}
properties:
'{'
- ' "$build/goma": {'
- ' "enable_ats": false,'
- ' "server_host": "goma.chromium.org",'
- ' "use_luci_auth": true'
+ ' "$build/reclient": {'
+ ' "instance": "rbe-webrtc-untrusted",'
+ ' "metrics_project": "chromium-reclient-metrics"'
' },'
' "builder_group": "tryserver.libyuv",'
' "recipe": "libyuv/libyuv"'
@@ -1661,10 +1625,9 @@ buckets {
}
properties:
'{'
- ' "$build/goma": {'
- ' "enable_ats": false,'
- ' "server_host": "goma.chromium.org",'
- ' "use_luci_auth": true'
+ ' "$build/reclient": {'
+ ' "instance": "rbe-webrtc-untrusted",'
+ ' "metrics_project": "chromium-reclient-metrics"'
' },'
' "builder_group": "tryserver.libyuv",'
' "recipe": "libyuv/libyuv"'
@@ -1692,10 +1655,9 @@ buckets {
}
properties:
'{'
- ' "$build/goma": {'
- ' "enable_ats": false,'
- ' "server_host": "goma.chromium.org",'
- ' "use_luci_auth": true'
+ ' "$build/reclient": {'
+ ' "instance": "rbe-webrtc-untrusted",'
+ ' "metrics_project": "chromium-reclient-metrics"'
' },'
' "builder_group": "tryserver.libyuv",'
' "recipe": "libyuv/libyuv"'
@@ -1723,10 +1685,9 @@ buckets {
}
properties:
'{'
- ' "$build/goma": {'
- ' "enable_ats": false,'
- ' "server_host": "goma.chromium.org",'
- ' "use_luci_auth": true'
+ ' "$build/reclient": {'
+ ' "instance": "rbe-webrtc-untrusted",'
+ ' "metrics_project": "chromium-reclient-metrics"'
' },'
' "builder_group": "tryserver.libyuv",'
' "recipe": "libyuv/libyuv"'
diff --git a/files/infra/config/luci-logdog.cfg b/infra/config/luci-logdog.cfg
index adc75bef..adc75bef 100644
--- a/files/infra/config/luci-logdog.cfg
+++ b/infra/config/luci-logdog.cfg
diff --git a/files/infra/config/luci-milo.cfg b/infra/config/luci-milo.cfg
index baf786f2..baf786f2 100644
--- a/files/infra/config/luci-milo.cfg
+++ b/infra/config/luci-milo.cfg
diff --git a/files/infra/config/luci-scheduler.cfg b/infra/config/luci-scheduler.cfg
index 0ec5dd0e..0ec5dd0e 100644
--- a/files/infra/config/luci-scheduler.cfg
+++ b/infra/config/luci-scheduler.cfg
diff --git a/files/infra/config/main.star b/infra/config/main.star
index b922ca02..e83afe4f 100755
--- a/files/infra/config/main.star
+++ b/infra/config/main.star
@@ -8,22 +8,14 @@ lucicfg.check_version("1.30.9")
LIBYUV_GIT = "https://chromium.googlesource.com/libyuv/libyuv"
LIBYUV_GERRIT = "https://chromium-review.googlesource.com/libyuv/libyuv"
-GOMA_BACKEND_RBE_PROD = {
- "server_host": "goma.chromium.org",
- "use_luci_auth": True,
+RECLIENT_CI = {
+ "instance": "rbe-webrtc-trusted",
+ "metrics_project": "chromium-reclient-metrics",
}
-GOMA_BACKEND_RBE_ATS_PROD = {
- "server_host": "goma.chromium.org",
- "use_luci_auth": True,
- "enable_ats": True,
-}
-
-# Disable ATS on Windows CQ/try.
-GOMA_BACKEND_RBE_NO_ATS_PROD = {
- "server_host": "goma.chromium.org",
- "use_luci_auth": True,
- "enable_ats": False,
+RECLIENT_CQ = {
+ "instance": "rbe-webrtc-untrusted",
+ "metrics_project": "chromium-reclient-metrics",
}
# Use LUCI Scheduler BBv2 names and add Scheduler realms configs.
@@ -70,6 +62,10 @@ luci.project(
],
bindings = [
luci.binding(
+ roles = "role/swarming.taskTriggerer", # for LED tasks.
+ groups = "project-libyuv-admins",
+ ),
+ luci.binding(
roles = "role/configs.validator",
users = "libyuv-try-builder@chops-service-accounts.iam.gserviceaccount.com",
),
@@ -195,28 +191,15 @@ luci.bucket(
def get_os_dimensions(os):
if os == "android":
- return {"device_type": "bullhead"}
+ return {"device_type": "walleye"}
if os == "ios" or os == "mac":
- return {"os": "Mac-10.15", "cpu": "x86-64"}
+ return {"os": "Mac-12", "cpu": "x86-64"}
elif os == "win":
return {"os": "Windows-10", "cores": "8", "cpu": "x86-64"}
elif os == "linux":
return {"os": "Ubuntu-18.04", "cores": "8", "cpu": "x86-64"}
return {}
-def get_os_properties(os, try_builder = False):
- if os == "android":
- return {"$build/goma": GOMA_BACKEND_RBE_PROD}
- elif os in ("ios", "mac"):
- return {"$build/goma": GOMA_BACKEND_RBE_PROD}
- elif os == "win" and try_builder:
- return {"$build/goma": GOMA_BACKEND_RBE_NO_ATS_PROD}
- elif os == "win":
- return {"$build/goma": GOMA_BACKEND_RBE_ATS_PROD}
- elif os == "linux":
- return {"$build/goma": GOMA_BACKEND_RBE_ATS_PROD}
- return {}
-
def libyuv_ci_builder(name, dimensions, properties, triggered_by):
return luci.builder(
name = name,
@@ -254,7 +237,7 @@ def libyuv_try_builder(name, dimensions, properties, recipe_name = "libyuv/libyu
def ci_builder(name, os, category, short_name = None):
dimensions = get_os_dimensions(os)
- properties = get_os_properties(os)
+ properties = {"$build/reclient": RECLIENT_CI}
dimensions["pool"] = "luci.flex.ci"
properties["builder_group"] = "client.libyuv"
@@ -265,7 +248,7 @@ def ci_builder(name, os, category, short_name = None):
def try_builder(name, os, experiment_percentage = None):
dimensions = get_os_dimensions(os)
- properties = get_os_properties(os, try_builder = True)
+ properties = {"$build/reclient": RECLIENT_CQ}
dimensions["pool"] = "luci.flex.try"
properties["builder_group"] = "tryserver.libyuv"
diff --git a/files/infra/config/project.cfg b/infra/config/project.cfg
index 700226ad..3c327118 100644
--- a/files/infra/config/project.cfg
+++ b/infra/config/project.cfg
@@ -7,7 +7,7 @@
name: "libyuv"
access: "group:all"
lucicfg {
- version: "1.30.9"
+ version: "1.39.14"
package_dir: "."
config_dir: "."
entry_point: "main.star"
diff --git a/files/infra/config/realms.cfg b/infra/config/realms.cfg
index ae04529e..16ffaac9 100644
--- a/files/infra/config/realms.cfg
+++ b/infra/config/realms.cfg
@@ -38,6 +38,10 @@ realms {
role: "role/scheduler.reader"
principals: "group:all"
}
+ bindings {
+ role: "role/swarming.taskTriggerer"
+ principals: "group:project-libyuv-admins"
+ }
}
realms {
name: "ci"
diff --git a/files/libyuv.gni b/libyuv.gni
index 8df40ba2..343160c3 100644
--- a/files/libyuv.gni
+++ b/libyuv.gni
@@ -6,13 +6,15 @@
# in the file PATENTS. All contributing project authors may
# be found in the AUTHORS file in the root of the source tree.
-import("//build_overrides/build.gni")
import("//build/config/arm.gni")
+import("//build/config/loongarch64.gni")
import("//build/config/mips.gni")
+import("//build_overrides/build.gni")
declare_args() {
libyuv_include_tests = !build_with_chromium
libyuv_disable_jpeg = false
+ libyuv_disable_rvv = false
libyuv_use_neon =
current_cpu == "arm64" ||
(current_cpu == "arm" && (arm_use_neon || arm_optionally_use_neon))
@@ -20,4 +22,8 @@ declare_args() {
(current_cpu == "mips64el" || current_cpu == "mipsel") && mips_use_msa
libyuv_use_mmi =
(current_cpu == "mips64el" || current_cpu == "mipsel") && mips_use_mmi
+ libyuv_use_lsx =
+ (current_cpu == "loong64") && loongarch64_use_lsx
+ libyuv_use_lasx =
+ (current_cpu == "loong64") && loongarch64_use_lasx
}
diff --git a/files/libyuv.gyp b/libyuv.gyp
index f73a1a4b..f73a1a4b 100644
--- a/files/libyuv.gyp
+++ b/libyuv.gyp
diff --git a/files/libyuv.gypi b/libyuv.gypi
index 48936aa7..48936aa7 100644
--- a/files/libyuv.gypi
+++ b/libyuv.gypi
diff --git a/files/linux.mk b/linux.mk
index b541b47c..d19a888a 100644
--- a/files/linux.mk
+++ b/linux.mk
@@ -33,6 +33,7 @@ LOCAL_OBJ_FILES := \
source/rotate_argb.o \
source/rotate_common.o \
source/rotate_gcc.o \
+ source/rotate_lsx.o \
source/rotate_msa.o \
source/rotate_neon.o \
source/rotate_neon64.o \
@@ -40,19 +41,24 @@ LOCAL_OBJ_FILES := \
source/row_any.o \
source/row_common.o \
source/row_gcc.o \
+ source/row_lasx.o \
+ source/row_lsx.o \
source/row_msa.o \
source/row_neon.o \
source/row_neon64.o \
+ source/row_rvv.o \
source/row_win.o \
source/scale.o \
source/scale_any.o \
source/scale_argb.o \
source/scale_common.o \
source/scale_gcc.o \
+ source/scale_lsx.o \
source/scale_msa.o \
source/scale_neon.o \
source/scale_neon64.o \
source/scale_rgb.o \
+ source/scale_rvv.o \
source/scale_uv.o \
source/scale_win.o \
source/video_common.o
diff --git a/public.mk b/public.mk
index 259ece21..1342307a 100644
--- a/public.mk
+++ b/public.mk
@@ -3,7 +3,7 @@
# Note that dependencies on NDK are not directly listed since NDK auto adds
# them.
-LIBYUV_INCLUDES := $(LIBYUV_PATH)/files/include
+LIBYUV_INCLUDES := $(LIBYUV_PATH)/include
LIBYUV_C_FLAGS :=
diff --git a/files/pylintrc b/pylintrc
index b8bea334..b8bea334 100644
--- a/files/pylintrc
+++ b/pylintrc
diff --git a/riscv_script/prepare_toolchain_qemu.sh b/riscv_script/prepare_toolchain_qemu.sh
new file mode 100755
index 00000000..2a901739
--- /dev/null
+++ b/riscv_script/prepare_toolchain_qemu.sh
@@ -0,0 +1,74 @@
+#!/bin/bash
+set -ev
+
+# Download & build RISC-V Clang toolchain & QEMU emulator.
+# RISC-V Clang is for cross compile with the RISC-V Vector ISA.
+# RISC-V QEMU is used to run the test suite.
+#
+# Requirements: Linux host w/ working C++ compiler, git, cmake, ninja, wget, tar
+
+# NOTE: this script must be run from the top-level directory of the LIBYUV_SRC_DIR.
+
+RISCV_TRIPLE="riscv64-unknown-linux-gnu"
+RISCV_QEMU="qemu-riscv64"
+
+LIBYUV_SRC_DIR=$(pwd)
+BUILD_DIR="$LIBYUV_SRC_DIR"/build-toolchain-qemu
+INSTALL_QEMU="$BUILD_DIR"/riscv-qemu
+INSTALL_CLANG="$BUILD_DIR"/riscv-clang
+
+LLVM_VERSION="16.0.0"
+LLVM_NAME=llvm-project-"$LLVM_VERSION".src
+
+RISCV_GNU_TOOLCHAIN="$BUILD_DIR"/riscv-gnu-toolchain
+RISCV_CLANG_TOOLCHAIN="$BUILD_DIR"/"$LLVM_NAME"
+
+QEMU_NAME="qemu-7.0.0"
+
+mkdir -p "$BUILD_DIR"
+cd "$BUILD_DIR"
+
+# Download and install RISC-V GNU Toolchain (needed to build Clang)
+if [ ! -d "$RISCV_GNU_TOOLCHAIN" ]
+then
+ git clone git@github.com:riscv/riscv-gnu-toolchain.git
+ pushd "$RISCV_GNU_TOOLCHAIN"
+ git submodule update --init --recursive
+ ./configure --with-cmodel=medany --prefix="$INSTALL_CLANG"
+ ionice nice make linux -j `nproc` install
+ popd
+fi
+
+# Download Clang toolchain & build cross compiler
+if [ ! -d "$RISCV_CLANG_TOOLCHAIN" ]
+then
+ wget https://github.com/llvm/llvm-project/releases/download/llvmorg-"$LLVM_VERSION"/"$LLVM_NAME".tar.xz
+ tar xvJf "$LLVM_NAME".tar.xz
+ pushd "$RISCV_CLANG_TOOLCHAIN"
+ cmake -DCMAKE_INSTALL_PREFIX="$INSTALL_CLANG" \
+ -DCMAKE_BUILD_TYPE=Release \
+ -DLLVM_TARGETS_TO_BUILD="RISCV" \
+ -DLLVM_ENABLE_PROJECTS="clang" \
+ -DLLVM_DEFAULT_TARGET_TRIPLE="$RISCV_TRIPLE" \
+ -DLLVM_INSTALL_TOOLCHAIN_ONLY=On \
+ -DDEFAULT_SYSROOT=../sysroot \
+ -G "Ninja" "$RISCV_CLANG_TOOLCHAIN"/llvm
+ ionice nice ninja -j `nproc`
+ ionice nice ninja -j `nproc` install
+ popd
+ pushd "$INSTALL_CLANG"/bin
+ ln -sf clang "$RISCV_TRIPLE"-clang
+ ln -sf clang++ "$RISCV_TRIPLE"-clang++
+ popd
+fi
+
+# Download QEMU and build the riscv64 Linux usermode emulator
+if [ ! -d "$QEMU_NAME" ]
+then
+ wget https://download.qemu.org/"$QEMU_NAME".tar.xz
+ tar xvJf "$QEMU_NAME".tar.xz
+ pushd "$QEMU_NAME"
+ ./configure --target-list=riscv64-linux-user --prefix="$INSTALL_QEMU"
+ ionice nice make -j `nproc` install
+ popd
+fi
diff --git a/riscv_script/riscv-clang.cmake b/riscv_script/riscv-clang.cmake
new file mode 100644
index 00000000..e287941f
--- /dev/null
+++ b/riscv_script/riscv-clang.cmake
@@ -0,0 +1,55 @@
+set(CMAKE_CROSSCOMPILING TRUE)
+set(CMAKE_SYSTEM_NAME "Linux")
+set(CMAKE_SYSTEM_PROCESSOR "riscv64")
+
+option(USE_RVV "Enable riscv vector or not." ON)
+option(USE_AUTO_VECTORIZER "Enable riscv auto vectorizer or not." OFF)
+
+# Avoid to use system path for cross-compile
+set(CMAKE_FIND_USE_CMAKE_SYSTEM_PATH FALSE)
+
+set(TOOLCHAIN_PATH "" CACHE STRING "The toolcahin path.")
+if(NOT TOOLCHAIN_PATH)
+ set(TOOLCHAIN_PATH ${CMAKE_SOURCE_DIR}/build-toolchain-qemu/riscv-clang)
+endif()
+
+set(TOOLCHAIN_PREFIX "riscv64-unknown-linux-gnu-" CACHE STRING "The toolcahin prefix.")
+
+# toolchain setting
+set(CMAKE_C_COMPILER "${TOOLCHAIN_PATH}/bin/${TOOLCHAIN_PREFIX}clang")
+set(CMAKE_CXX_COMPILER "${TOOLCHAIN_PATH}/bin/${TOOLCHAIN_PREFIX}clang++")
+
+# CMake will just use the host-side tools for the following tools, so we setup them here.
+set(CMAKE_C_COMPILER_AR "${TOOLCHAIN_PATH}/bin/llvm-ar")
+set(CMAKE_CXX_COMPILER_AR "${TOOLCHAIN_PATH}/bin/llvm-ar")
+set(CMAKE_C_COMPILER_RANLIB "${TOOLCHAIN_PATH}/bin/llvm-ranlib")
+set(CMAKE_CXX_COMPILER_RANLIB "${TOOLCHAIN_PATH}/bin/llvm-ranlib")
+set(CMAKE_OBJDUMP "${TOOLCHAIN_PATH}/bin/llvm-objdump")
+set(CMAKE_OBJCOPY "${TOOLCHAIN_PATH}/bin/llvm-objcopy")
+
+# compile options
+set(RISCV_COMPILER_FLAGS "" CACHE STRING "Compile flags")
+# if user provides RISCV_COMPILER_FLAGS, appeding compile flags is avoided.
+if(RISCV_COMPILER_FLAGS STREQUAL "")
+ message(STATUS "USE_RVV: ${USE_RVV}")
+ message(STATUS "USE_AUTO_VECTORIZER: ${USE_AUTO_VECTORIZER}")
+ if(USE_RVV)
+ list(APPEND RISCV_COMPILER_FLAGS "-march=rv64gcv")
+ if(NOT USE_AUTO_VECTORIZER)
+ # Disable auto-vectorizer
+ add_compile_options(-fno-vectorize -fno-slp-vectorize)
+ endif()
+ else()
+ list(APPEND RISCV_COMPILER_FLAGS "-march=rv64gc")
+ endif()
+endif()
+message(STATUS "RISCV_COMPILER_FLAGS: ${RISCV_COMPILER_FLAGS}")
+
+set(CMAKE_C_FLAGS "${RISCV_COMPILER_FLAGS} ${CMAKE_C_FLAGS}")
+set(CMAKE_CXX_FLAGS "${RISCV_COMPILER_FLAGS} ${CMAKE_CXX_FLAGS}")
+
+set(RISCV_LINKER_FLAGS "-lstdc++ -lpthread -lm -ldl")
+set(RISCV_LINKER_FLAGS_EXE)
+set(CMAKE_SHARED_LINKER_FLAGS "${RISCV_LINKER_FLAGS} ${CMAKE_SHARED_LINKER_FLAGS}")
+set(CMAKE_MODULE_LINKER_FLAGS "${RISCV_LINKER_FLAGS} ${CMAKE_MODULE_LINKER_FLAGS}")
+set(CMAKE_EXE_LINKER_FLAGS "${RISCV_LINKER_FLAGS} ${RISCV_LINKER_FLAGS_EXE} ${CMAKE_EXE_LINKER_FLAGS}")
diff --git a/riscv_script/run_qemu.sh b/riscv_script/run_qemu.sh
new file mode 100755
index 00000000..080af3b1
--- /dev/null
+++ b/riscv_script/run_qemu.sh
@@ -0,0 +1,15 @@
+#!/bin/bash
+set -x
+set -e
+
+USE_RVV="${USE_RVV:-OFF}"
+TOOLCHAIN_PATH="${TOOLCHAIN_PATH:-../../build-toolchain-qemu/riscv-clang}"
+QEMU_PREFIX_PATH="${QEMU_PREFIX_PATH:-../../build-toolchain-qemu/riscv-qemu/}"
+
+if [ "${USE_RVV}" = "ON" ];then
+ QEMU_OPTION="-cpu rv64,zba=true,zbb=true,zbc=true,zbs=true,v=true,vlen=512,elen=64,vext_spec=v1.0 -L ${TOOLCHAIN_PATH}/sysroot"
+else
+ QEMU_OPTION="-cpu rv64,zba=true,zbb=true,zbc=true,zbs=true -L ${TOOLCHAIN_PATH}/sysroot"
+fi
+
+$QEMU_PREFIX_PATH/bin/qemu-riscv64 $QEMU_OPTION $@
diff --git a/files/source/compare.cc b/source/compare.cc
index d4713b60..50a736bd 100644
--- a/files/source/compare.cc
+++ b/source/compare.cc
@@ -45,7 +45,7 @@ uint32_t HashDjb2(const uint8_t* src, uint64_t count, uint32_t seed) {
}
#endif
- while (count >= (uint64_t)(kBlockSize)) {
+ while (count >= (uint64_t)kBlockSize) {
seed = HashDjb2_SSE(src, kBlockSize, seed);
src += kBlockSize;
count -= kBlockSize;
@@ -359,10 +359,10 @@ static double Ssim8x8_C(const uint8_t* src_a,
(sum_a_sq + sum_b_sq + c1) *
(count * sum_sq_a - sum_a_sq + count * sum_sq_b - sum_b_sq + c2);
- if (ssim_d == 0.0) {
+ if (ssim_d == 0) {
return DBL_MAX;
}
- return ssim_n * 1.0 / ssim_d;
+ return (double)ssim_n / (double)ssim_d;
}
}
diff --git a/files/source/compare_common.cc b/source/compare_common.cc
index d1cab8d2..d1cab8d2 100644
--- a/files/source/compare_common.cc
+++ b/source/compare_common.cc
diff --git a/files/source/compare_gcc.cc b/source/compare_gcc.cc
index b834b42a..33cbe25d 100644
--- a/files/source/compare_gcc.cc
+++ b/source/compare_gcc.cc
@@ -67,7 +67,7 @@ uint32_t HammingDistance_SSE42(const uint8_t* src_a,
:
: "memory", "cc", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10");
- return static_cast<uint32_t>(diff);
+ return (uint32_t)(diff);
}
#else
uint32_t HammingDistance_SSE42(const uint8_t* src_a,
diff --git a/files/source/compare_msa.cc b/source/compare_msa.cc
index 0b807d37..0b807d37 100644
--- a/files/source/compare_msa.cc
+++ b/source/compare_msa.cc
diff --git a/files/source/compare_neon.cc b/source/compare_neon.cc
index afdd6012..afdd6012 100644
--- a/files/source/compare_neon.cc
+++ b/source/compare_neon.cc
diff --git a/files/source/compare_neon64.cc b/source/compare_neon64.cc
index 70fb9b91..70fb9b91 100644
--- a/files/source/compare_neon64.cc
+++ b/source/compare_neon64.cc
diff --git a/files/source/compare_win.cc b/source/compare_win.cc
index 9bb27f1d..9bb27f1d 100644
--- a/files/source/compare_win.cc
+++ b/source/compare_win.cc
diff --git a/files/source/convert.cc b/source/convert.cc
index 7178580f..6ac5bc43 100644
--- a/files/source/convert.cc
+++ b/source/convert.cc
@@ -24,6 +24,10 @@ namespace libyuv {
extern "C" {
#endif
+// Subsample amount uses a shift.
+// v is value
+// a is amount to add to round up
+// s is shift to subsample down
#define SUBSAMPLE(v, a, s) (v < 0) ? (-((-v + a) >> s)) : ((v + a) >> s)
static __inline int Abs(int v) {
return v >= 0 ? v : -v;
@@ -50,18 +54,25 @@ static int I4xxToI420(const uint8_t* src_y,
const int dst_y_height = Abs(src_y_height);
const int dst_uv_width = SUBSAMPLE(dst_y_width, 1, 1);
const int dst_uv_height = SUBSAMPLE(dst_y_height, 1, 1);
+ int r;
if (src_uv_width <= 0 || src_uv_height == 0) {
return -1;
}
if (dst_y) {
- ScalePlane(src_y, src_stride_y, src_y_width, src_y_height, dst_y,
- dst_stride_y, dst_y_width, dst_y_height, kFilterBilinear);
+ r = ScalePlane(src_y, src_stride_y, src_y_width, src_y_height, dst_y,
+ dst_stride_y, dst_y_width, dst_y_height, kFilterBilinear);
+ if (r != 0) {
+ return r;
+ }
}
- ScalePlane(src_u, src_stride_u, src_uv_width, src_uv_height, dst_u,
- dst_stride_u, dst_uv_width, dst_uv_height, kFilterBilinear);
- ScalePlane(src_v, src_stride_v, src_uv_width, src_uv_height, dst_v,
- dst_stride_v, dst_uv_width, dst_uv_height, kFilterBilinear);
- return 0;
+ r = ScalePlane(src_u, src_stride_u, src_uv_width, src_uv_height, dst_u,
+ dst_stride_u, dst_uv_width, dst_uv_height, kFilterBilinear);
+ if (r != 0) {
+ return r;
+ }
+ r = ScalePlane(src_v, src_stride_v, src_uv_width, src_uv_height, dst_v,
+ dst_stride_v, dst_uv_width, dst_uv_height, kFilterBilinear);
+ return r;
}
// Copy I420 with optional flipping.
@@ -199,6 +210,99 @@ static int Planar16bitTo8bit(const uint16_t* src_y,
return 0;
}
+static int I41xToI420(const uint16_t* src_y,
+ int src_stride_y,
+ const uint16_t* src_u,
+ int src_stride_u,
+ const uint16_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ uint8_t* dst_u,
+ int dst_stride_u,
+ uint8_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height,
+ int depth) {
+ const int scale = 1 << (24 - depth);
+
+ if (width <= 0 || height == 0) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ src_y = src_y + (height - 1) * src_stride_y;
+ src_u = src_u + (height - 1) * src_stride_u;
+ src_v = src_v + (height - 1) * src_stride_v;
+ src_stride_y = -src_stride_y;
+ src_stride_u = -src_stride_u;
+ src_stride_v = -src_stride_v;
+ }
+
+ {
+ const int uv_width = SUBSAMPLE(width, 1, 1);
+ const int uv_height = SUBSAMPLE(height, 1, 1);
+
+ Convert16To8Plane(src_y, src_stride_y, dst_y, dst_stride_y, scale, width,
+ height);
+ ScalePlaneDown2_16To8(width, height, uv_width, uv_height, src_stride_u,
+ dst_stride_u, src_u, dst_u, scale, kFilterBilinear);
+ ScalePlaneDown2_16To8(width, height, uv_width, uv_height, src_stride_v,
+ dst_stride_v, src_v, dst_v, scale, kFilterBilinear);
+ }
+ return 0;
+}
+
+static int I21xToI420(const uint16_t* src_y,
+ int src_stride_y,
+ const uint16_t* src_u,
+ int src_stride_u,
+ const uint16_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ uint8_t* dst_u,
+ int dst_stride_u,
+ uint8_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height,
+ int depth) {
+ const int scale = 1 << (24 - depth);
+
+ if (width <= 0 || height == 0) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ src_y = src_y + (height - 1) * src_stride_y;
+ src_u = src_u + (height - 1) * src_stride_u;
+ src_v = src_v + (height - 1) * src_stride_v;
+ src_stride_y = -src_stride_y;
+ src_stride_u = -src_stride_u;
+ src_stride_v = -src_stride_v;
+ }
+
+ {
+ const int uv_width = SUBSAMPLE(width, 1, 1);
+ const int uv_height = SUBSAMPLE(height, 1, 1);
+ const int dy = FixedDiv(height, uv_height);
+
+ Convert16To8Plane(src_y, src_stride_y, dst_y, dst_stride_y, scale, width,
+ height);
+ ScalePlaneVertical_16To8(height, uv_width, uv_height, src_stride_u,
+ dst_stride_u, src_u, dst_u, 0, 32768, dy,
+ /*bpp=*/1, scale, kFilterBilinear);
+ ScalePlaneVertical_16To8(height, uv_width, uv_height, src_stride_v,
+ dst_stride_v, src_v, dst_v, 0, 32768, dy,
+ /*bpp=*/1, scale, kFilterBilinear);
+ }
+ return 0;
+}
+
// Convert 10 bit YUV to 8 bit.
LIBYUV_API
int I010ToI420(const uint16_t* src_y,
@@ -236,38 +340,9 @@ int I210ToI420(const uint16_t* src_y,
int dst_stride_v,
int width,
int height) {
- const int depth = 10;
- const int scale = 1 << (24 - depth);
-
- if (width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- src_y = src_y + (height - 1) * src_stride_y;
- src_u = src_u + (height - 1) * src_stride_u;
- src_v = src_v + (height - 1) * src_stride_v;
- src_stride_y = -src_stride_y;
- src_stride_u = -src_stride_u;
- src_stride_v = -src_stride_v;
- }
-
- {
- const int uv_width = SUBSAMPLE(width, 1, 1);
- const int uv_height = SUBSAMPLE(height, 1, 1);
- const int dy = FixedDiv(height, uv_height);
-
- Convert16To8Plane(src_y, src_stride_y, dst_y, dst_stride_y, scale, width,
- height);
- ScalePlaneVertical_16To8(height, uv_width, uv_height, src_stride_u,
- dst_stride_u, src_u, dst_u, 0, 32768, dy,
- /*bpp=*/1, scale, kFilterBilinear);
- ScalePlaneVertical_16To8(height, uv_width, uv_height, src_stride_v,
- dst_stride_v, src_v, dst_v, 0, 32768, dy,
- /*bpp=*/1, scale, kFilterBilinear);
- }
- return 0;
+ return I21xToI420(src_y, src_stride_y, src_u, src_stride_u, src_v,
+ src_stride_v, dst_y, dst_stride_y, dst_u, dst_stride_u,
+ dst_v, dst_stride_v, width, height, 10);
}
LIBYUV_API
@@ -292,6 +367,26 @@ int I210ToI422(const uint16_t* src_y,
}
LIBYUV_API
+int I410ToI420(const uint16_t* src_y,
+ int src_stride_y,
+ const uint16_t* src_u,
+ int src_stride_u,
+ const uint16_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ uint8_t* dst_u,
+ int dst_stride_u,
+ uint8_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height) {
+ return I41xToI420(src_y, src_stride_y, src_u, src_stride_u, src_v,
+ src_stride_v, dst_y, dst_stride_y, dst_u, dst_stride_u,
+ dst_v, dst_stride_v, width, height, 10);
+}
+
+LIBYUV_API
int I410ToI444(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
@@ -355,6 +450,26 @@ int I212ToI422(const uint16_t* src_y,
}
LIBYUV_API
+int I212ToI420(const uint16_t* src_y,
+ int src_stride_y,
+ const uint16_t* src_u,
+ int src_stride_u,
+ const uint16_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ uint8_t* dst_u,
+ int dst_stride_u,
+ uint8_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height) {
+ return I21xToI420(src_y, src_stride_y, src_u, src_stride_u, src_v,
+ src_stride_v, dst_y, dst_stride_y, dst_u, dst_stride_u,
+ dst_v, dst_stride_v, width, height, 12);
+}
+
+LIBYUV_API
int I412ToI444(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
@@ -375,6 +490,26 @@ int I412ToI444(const uint16_t* src_y,
0, 12);
}
+LIBYUV_API
+int I412ToI420(const uint16_t* src_y,
+ int src_stride_y,
+ const uint16_t* src_u,
+ int src_stride_u,
+ const uint16_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ uint8_t* dst_u,
+ int dst_stride_u,
+ uint8_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height) {
+ return I41xToI420(src_y, src_stride_y, src_u, src_stride_u, src_v,
+ src_stride_v, dst_y, dst_stride_y, dst_u, dst_stride_u,
+ dst_v, dst_stride_v, width, height, 12);
+}
+
// Any Ix10 To I010 format with mirroring.
static int Ix10ToI010(const uint16_t* src_y,
int src_stride_y,
@@ -398,18 +533,25 @@ static int Ix10ToI010(const uint16_t* src_y,
const int src_uv_height = SUBSAMPLE(height, subsample_y, subsample_y);
const int dst_uv_width = SUBSAMPLE(dst_y_width, 1, 1);
const int dst_uv_height = SUBSAMPLE(dst_y_height, 1, 1);
+ int r;
if (width <= 0 || height == 0) {
return -1;
}
if (dst_y) {
- ScalePlane_12(src_y, src_stride_y, width, height, dst_y, dst_stride_y,
- dst_y_width, dst_y_height, kFilterBilinear);
+ r = ScalePlane_12(src_y, src_stride_y, width, height, dst_y, dst_stride_y,
+ dst_y_width, dst_y_height, kFilterBilinear);
+ if (r != 0) {
+ return r;
+ }
}
- ScalePlane_12(src_u, src_stride_u, src_uv_width, src_uv_height, dst_u,
- dst_stride_u, dst_uv_width, dst_uv_height, kFilterBilinear);
- ScalePlane_12(src_v, src_stride_v, src_uv_width, src_uv_height, dst_v,
- dst_stride_v, dst_uv_width, dst_uv_height, kFilterBilinear);
- return 0;
+ r = ScalePlane_12(src_u, src_stride_u, src_uv_width, src_uv_height, dst_u,
+ dst_stride_u, dst_uv_width, dst_uv_height, kFilterBilinear);
+ if (r != 0) {
+ return r;
+ }
+ r = ScalePlane_12(src_v, src_stride_v, src_uv_width, src_uv_height, dst_v,
+ dst_stride_v, dst_uv_width, dst_uv_height, kFilterBilinear);
+ return r;
}
LIBYUV_API
@@ -649,6 +791,8 @@ int I422ToNV21(const uint8_t* src_y,
// Allocate u and v buffers
align_buffer_64(plane_u, halfwidth * halfheight * 2);
uint8_t* plane_v = plane_u + halfwidth * halfheight;
+ if (!plane_u)
+ return 1;
I422ToI420(src_y, src_stride_y, src_u, src_stride_u, src_v, src_stride_v,
dst_y, dst_stride_y, plane_u, halfwidth, plane_v, halfwidth, width,
@@ -713,6 +857,112 @@ int MM21ToI420(const uint8_t* src_y,
return 0;
}
+LIBYUV_API
+int MM21ToYUY2(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_uv,
+ int src_stride_uv,
+ uint8_t* dst_yuy2,
+ int dst_stride_yuy2,
+ int width,
+ int height) {
+ if (!src_y || !src_uv || !dst_yuy2 || width <= 0) {
+ return -1;
+ }
+
+ DetileToYUY2(src_y, src_stride_y, src_uv, src_stride_uv, dst_yuy2,
+ dst_stride_yuy2, width, height, 32);
+
+ return 0;
+}
+
+// Convert MT2T into P010. See tinyurl.com/mtk-10bit-video-format for format
+// documentation.
+// TODO(greenjustin): Add an MT2T to I420 conversion.
+LIBYUV_API
+int MT2TToP010(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_uv,
+ int src_stride_uv,
+ uint16_t* dst_y,
+ int dst_stride_y,
+ uint16_t* dst_uv,
+ int dst_stride_uv,
+ int width,
+ int height) {
+ if (width <= 0 || !height || !src_uv || !dst_uv) {
+ return -1;
+ }
+
+ {
+ int uv_width = (width + 1) & ~1;
+ int uv_height = (height + 1) / 2;
+ int y = 0;
+ const int tile_width = 16;
+ const int y_tile_height = 32;
+ const int uv_tile_height = 16;
+ int padded_width = (width + tile_width - 1) & ~(tile_width - 1);
+ int y_tile_row_size = padded_width * y_tile_height * 10 / 8;
+ int uv_tile_row_size = padded_width * uv_tile_height * 10 / 8;
+ size_t row_buf_size = padded_width * y_tile_height * sizeof(uint16_t);
+ void (*UnpackMT2T)(const uint8_t* src, uint16_t* dst, size_t size) =
+ UnpackMT2T_C;
+ align_buffer_64(row_buf, row_buf_size);
+ if (!row_buf)
+ return 1;
+
+#if defined(HAS_UNPACKMT2T_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ UnpackMT2T = UnpackMT2T_NEON;
+ }
+#endif
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ uv_height = (height + 1) / 2;
+ if (dst_y) {
+ dst_y = dst_y + (height - 1) * dst_stride_y;
+ dst_stride_y = -dst_stride_y;
+ }
+ dst_uv = dst_uv + (uv_height - 1) * dst_stride_uv;
+ dst_stride_uv = -dst_stride_uv;
+ }
+
+ // Unpack and detile Y in rows of tiles
+ if (src_y && dst_y) {
+ for (y = 0; y < (height & ~(y_tile_height - 1)); y += y_tile_height) {
+ UnpackMT2T(src_y, (uint16_t*)row_buf, y_tile_row_size);
+ DetilePlane_16((uint16_t*)row_buf, padded_width, dst_y, dst_stride_y,
+ width, y_tile_height, y_tile_height);
+ src_y += src_stride_y * y_tile_height;
+ dst_y += dst_stride_y * y_tile_height;
+ }
+ if (height & (y_tile_height - 1)) {
+ UnpackMT2T(src_y, (uint16_t*)row_buf, y_tile_row_size);
+ DetilePlane_16((uint16_t*)row_buf, padded_width, dst_y, dst_stride_y,
+ width, height & (y_tile_height - 1), y_tile_height);
+ }
+ }
+
+ // Unpack and detile UV plane
+ for (y = 0; y < (uv_height & ~(uv_tile_height - 1)); y += uv_tile_height) {
+ UnpackMT2T(src_uv, (uint16_t*)row_buf, uv_tile_row_size);
+ DetilePlane_16((uint16_t*)row_buf, padded_width, dst_uv, dst_stride_uv,
+ uv_width, uv_tile_height, uv_tile_height);
+ src_uv += src_stride_uv * uv_tile_height;
+ dst_uv += dst_stride_uv * uv_tile_height;
+ }
+ if (uv_height & (uv_tile_height - 1)) {
+ UnpackMT2T(src_uv, (uint16_t*)row_buf, uv_tile_row_size);
+ DetilePlane_16((uint16_t*)row_buf, padded_width, dst_uv, dst_stride_uv,
+ uv_width, uv_height & (uv_tile_height - 1),
+ uv_tile_height);
+ }
+ free_aligned_buffer_64(row_buf);
+ }
+ return 0;
+}
+
#ifdef I422TONV21_ROW_VERSION
// Unittest fails for this version.
// 422 chroma is 1/2 width, 1x height
@@ -734,7 +984,7 @@ int I422ToNV21(const uint8_t* src_y,
int y;
void (*MergeUVRow)(const uint8_t* src_u, const uint8_t* src_v,
uint8_t* dst_uv, int width) = MergeUVRow_C;
- void (*InterpolateRow)(uint8_t * dst_ptr, const uint8_t* src_ptr,
+ void (*InterpolateRow)(uint8_t* dst_ptr, const uint8_t* src_ptr,
ptrdiff_t src_stride, int dst_width,
int source_y_fraction) = InterpolateRow_C;
int halfwidth = (width + 1) >> 1;
@@ -764,11 +1014,19 @@ int I422ToNV21(const uint8_t* src_y,
#if defined(HAS_MERGEUVROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
MergeUVRow = MergeUVRow_Any_AVX2;
- if (IS_ALIGNED(halfwidth, 32)) {
+ if (IS_ALIGNED(halfwidth, 16)) {
MergeUVRow = MergeUVRow_AVX2;
}
}
#endif
+#if defined(HAS_MERGEUVROW_AVX512BW)
+ if (TestCpuFlag(kCpuHasAVX512BW)) {
+ MergeUVRow = MergeUVRow_Any_AVX512BW;
+ if (IS_ALIGNED(halfwidth, 32)) {
+ MergeUVRow = MergeUVRow_AVX512BW;
+ }
+ }
+#endif
#if defined(HAS_MERGEUVROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
MergeUVRow = MergeUVRow_Any_NEON;
@@ -793,6 +1051,11 @@ int I422ToNV21(const uint8_t* src_y,
}
}
#endif
+#if defined(HAS_MERGEUVROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ MergeUVRow = MergeUVRow_RVV;
+ }
+#endif
#if defined(HAS_INTERPOLATEROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
InterpolateRow = InterpolateRow_Any_SSSE3;
@@ -833,6 +1096,11 @@ int I422ToNV21(const uint8_t* src_y,
}
}
#endif
+#if defined(HAS_INTERPOLATEROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ InterpolateRow = InterpolateRow_RVV;
+ }
+#endif
if (dst_y) {
CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, halfwidth, height);
@@ -842,6 +1110,8 @@ int I422ToNV21(const uint8_t* src_y,
int awidth = halfwidth * 2;
align_buffer_64(row_vu_0, awidth * 2);
uint8_t* row_vu_1 = row_vu_0 + awidth;
+ if (!row_vu_0)
+ return 1;
for (y = 0; y < height - 1; y += 2) {
MergeUVRow(src_v, src_u, row_vu_0, halfwidth);
@@ -1080,18 +1350,22 @@ int NV12ToNV24(const uint8_t* src_y,
int dst_stride_uv,
int width,
int height) {
+ int r;
if (width <= 0 || height == 0) {
return -1;
}
if (dst_y) {
- ScalePlane(src_y, src_stride_y, width, height, dst_y, dst_stride_y,
- Abs(width), Abs(height), kFilterBilinear);
+ r = ScalePlane(src_y, src_stride_y, width, height, dst_y, dst_stride_y,
+ Abs(width), Abs(height), kFilterBilinear);
+ if (r != 0) {
+ return r;
+ }
}
- UVScale(src_uv, src_stride_uv, SUBSAMPLE(width, 1, 1),
- SUBSAMPLE(height, 1, 1), dst_uv, dst_stride_uv, Abs(width),
- Abs(height), kFilterBilinear);
- return 0;
+ r = UVScale(src_uv, src_stride_uv, SUBSAMPLE(width, 1, 1),
+ SUBSAMPLE(height, 1, 1), dst_uv, dst_stride_uv, Abs(width),
+ Abs(height), kFilterBilinear);
+ return r;
}
LIBYUV_API
@@ -1105,20 +1379,88 @@ int NV16ToNV24(const uint8_t* src_y,
int dst_stride_uv,
int width,
int height) {
+ int r;
if (width <= 0 || height == 0) {
return -1;
}
if (dst_y) {
- ScalePlane(src_y, src_stride_y, width, height, dst_y, dst_stride_y,
- Abs(width), Abs(height), kFilterBilinear);
+ r = ScalePlane(src_y, src_stride_y, width, height, dst_y, dst_stride_y,
+ Abs(width), Abs(height), kFilterBilinear);
+ if (r != 0) {
+ return r;
+ }
}
- UVScale(src_uv, src_stride_uv, SUBSAMPLE(width, 1, 1), height, dst_uv,
- dst_stride_uv, Abs(width), Abs(height), kFilterBilinear);
+ r = UVScale(src_uv, src_stride_uv, SUBSAMPLE(width, 1, 1), height, dst_uv,
+ dst_stride_uv, Abs(width), Abs(height), kFilterBilinear);
+ return r;
+}
+
+// Any P[420]1[02] to I[420]1[02] format with mirroring.
+static int PxxxToIxxx(const uint16_t* src_y,
+ int src_stride_y,
+ const uint16_t* src_uv,
+ int src_stride_uv,
+ uint16_t* dst_y,
+ int dst_stride_y,
+ uint16_t* dst_u,
+ int dst_stride_u,
+ uint16_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height,
+ int subsample_x,
+ int subsample_y,
+ int depth) {
+ const int uv_width = SUBSAMPLE(width, subsample_x, subsample_x);
+ const int uv_height = SUBSAMPLE(height, subsample_y, subsample_y);
+ if (width <= 0 || height == 0) {
+ return -1;
+ }
+ ConvertToLSBPlane_16(src_y, src_stride_y, dst_y, dst_stride_y, width, height,
+ depth);
+ SplitUVPlane_16(src_uv, src_stride_uv, dst_u, dst_stride_u, dst_v,
+ dst_stride_v, uv_width, uv_height, depth);
return 0;
}
LIBYUV_API
+int P010ToI010(const uint16_t* src_y,
+ int src_stride_y,
+ const uint16_t* src_uv,
+ int src_stride_uv,
+ uint16_t* dst_y,
+ int dst_stride_y,
+ uint16_t* dst_u,
+ int dst_stride_u,
+ uint16_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height) {
+ return PxxxToIxxx(src_y, src_stride_y, src_uv, src_stride_uv, dst_y,
+ dst_stride_y, dst_u, dst_stride_u, dst_v, dst_stride_v,
+ width, height, 1, 1, 10);
+}
+
+LIBYUV_API
+int P012ToI012(const uint16_t* src_y,
+ int src_stride_y,
+ const uint16_t* src_uv,
+ int src_stride_uv,
+ uint16_t* dst_y,
+ int dst_stride_y,
+ uint16_t* dst_u,
+ int dst_stride_u,
+ uint16_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height) {
+ return PxxxToIxxx(src_y, src_stride_y, src_uv, src_stride_uv, dst_y,
+ dst_stride_y, dst_u, dst_stride_u, dst_v, dst_stride_v,
+ width, height, 1, 1, 12);
+}
+
+LIBYUV_API
int P010ToP410(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_uv,
@@ -1129,18 +1471,22 @@ int P010ToP410(const uint16_t* src_y,
int dst_stride_uv,
int width,
int height) {
+ int r;
if (width <= 0 || height == 0) {
return -1;
}
if (dst_y) {
- ScalePlane_16(src_y, src_stride_y, width, height, dst_y, dst_stride_y,
- Abs(width), Abs(height), kFilterBilinear);
+ r = ScalePlane_16(src_y, src_stride_y, width, height, dst_y, dst_stride_y,
+ Abs(width), Abs(height), kFilterBilinear);
+ if (r != 0) {
+ return r;
+ }
}
- UVScale_16(src_uv, src_stride_uv, SUBSAMPLE(width, 1, 1),
- SUBSAMPLE(height, 1, 1), dst_uv, dst_stride_uv, Abs(width),
- Abs(height), kFilterBilinear);
- return 0;
+ r = UVScale_16(src_uv, src_stride_uv, SUBSAMPLE(width, 1, 1),
+ SUBSAMPLE(height, 1, 1), dst_uv, dst_stride_uv, Abs(width),
+ Abs(height), kFilterBilinear);
+ return r;
}
LIBYUV_API
@@ -1154,17 +1500,21 @@ int P210ToP410(const uint16_t* src_y,
int dst_stride_uv,
int width,
int height) {
+ int r;
if (width <= 0 || height == 0) {
return -1;
}
if (dst_y) {
- ScalePlane_16(src_y, src_stride_y, width, height, dst_y, dst_stride_y,
- Abs(width), Abs(height), kFilterBilinear);
+ r = ScalePlane_16(src_y, src_stride_y, width, height, dst_y, dst_stride_y,
+ Abs(width), Abs(height), kFilterBilinear);
+ if (r != 0) {
+ return r;
+ }
}
- UVScale_16(src_uv, src_stride_uv, SUBSAMPLE(width, 1, 1), height, dst_uv,
- dst_stride_uv, Abs(width), Abs(height), kFilterBilinear);
- return 0;
+ r = UVScale_16(src_uv, src_stride_uv, SUBSAMPLE(width, 1, 1), height, dst_uv,
+ dst_stride_uv, Abs(width), Abs(height), kFilterBilinear);
+ return r;
}
// Convert YUY2 to I420.
@@ -1231,6 +1581,16 @@ int YUY2ToI420(const uint8_t* src_yuy2,
}
}
#endif
+#if defined(HAS_YUY2TOYROW_LSX) && defined(HAS_YUY2TOUVROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ YUY2ToYRow = YUY2ToYRow_Any_LSX;
+ YUY2ToUVRow = YUY2ToUVRow_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ YUY2ToYRow = YUY2ToYRow_LSX;
+ YUY2ToUVRow = YUY2ToUVRow_LSX;
+ }
+ }
+#endif
#if defined(HAS_YUY2TOYROW_LASX) && defined(HAS_YUY2TOUVROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
YUY2ToYRow = YUY2ToYRow_Any_LASX;
@@ -1322,6 +1682,26 @@ int UYVYToI420(const uint8_t* src_uyvy,
}
}
#endif
+#if defined(HAS_UYVYTOYROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ UYVYToYRow = UYVYToYRow_Any_LSX;
+ UYVYToUVRow = UYVYToUVRow_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ UYVYToYRow = UYVYToYRow_LSX;
+ UYVYToUVRow = UYVYToUVRow_LSX;
+ }
+ }
+#endif
+#if defined(HAS_UYVYTOYROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ UYVYToYRow = UYVYToYRow_Any_LSX;
+ UYVYToUVRow = UYVYToUVRow_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ UYVYToYRow = UYVYToYRow_LSX;
+ UYVYToUVRow = UYVYToUVRow_LSX;
+ }
+ }
+#endif
#if defined(HAS_UYVYTOYROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
UYVYToYRow = UYVYToYRow_Any_LASX;
@@ -1574,6 +1954,176 @@ int ARGBToI420(const uint8_t* src_argb,
}
}
#endif
+#if defined(HAS_ARGBTOYROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ ARGBToYRow = ARGBToYRow_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBToYRow = ARGBToYRow_LSX;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTOYROW_LSX) && defined(HAS_ARGBTOUVROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ ARGBToYRow = ARGBToYRow_Any_LSX;
+ ARGBToUVRow = ARGBToUVRow_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBToYRow = ARGBToYRow_LSX;
+ ARGBToUVRow = ARGBToUVRow_LSX;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTOYROW_LASX) && defined(HAS_ARGBTOUVROW_LASX)
+ if (TestCpuFlag(kCpuHasLASX)) {
+ ARGBToYRow = ARGBToYRow_Any_LASX;
+ ARGBToUVRow = ARGBToUVRow_Any_LASX;
+ if (IS_ALIGNED(width, 32)) {
+ ARGBToYRow = ARGBToYRow_LASX;
+ ARGBToUVRow = ARGBToUVRow_LASX;
+ }
+ }
+#endif
+
+ for (y = 0; y < height - 1; y += 2) {
+ ARGBToUVRow(src_argb, src_stride_argb, dst_u, dst_v, width);
+ ARGBToYRow(src_argb, dst_y, width);
+ ARGBToYRow(src_argb + src_stride_argb, dst_y + dst_stride_y, width);
+ src_argb += src_stride_argb * 2;
+ dst_y += dst_stride_y * 2;
+ dst_u += dst_stride_u;
+ dst_v += dst_stride_v;
+ }
+ if (height & 1) {
+ ARGBToUVRow(src_argb, 0, dst_u, dst_v, width);
+ ARGBToYRow(src_argb, dst_y, width);
+ }
+ return 0;
+}
+
+#ifdef USE_EXTRACTALPHA
+// Convert ARGB to I420 with Alpha
+// The following version calls ARGBExtractAlpha on the full image.
+LIBYUV_API
+int ARGBToI420Alpha(const uint8_t* src_argb,
+ int src_stride_argb,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ uint8_t* dst_u,
+ int dst_stride_u,
+ uint8_t* dst_v,
+ int dst_stride_v,
+ uint8_t* dst_a,
+ int dst_stride_a,
+ int width,
+ int height) {
+ int r = ARGBToI420(src_argb, src_stride_argb, dst_y, dst_stride_y, dst_u,
+ dst_stride_u, dst_v, dst_stride_v, width, height);
+ if (r == 0) {
+ r = ARGBExtractAlpha(src_argb, src_stride_argb, dst_a, dst_stride_a, width,
+ height);
+ }
+ return r;
+}
+#else // USE_EXTRACTALPHA
+// Convert ARGB to I420 with Alpha
+LIBYUV_API
+int ARGBToI420Alpha(const uint8_t* src_argb,
+ int src_stride_argb,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ uint8_t* dst_u,
+ int dst_stride_u,
+ uint8_t* dst_v,
+ int dst_stride_v,
+ uint8_t* dst_a,
+ int dst_stride_a,
+ int width,
+ int height) {
+ int y;
+ void (*ARGBToUVRow)(const uint8_t* src_argb0, int src_stride_argb,
+ uint8_t* dst_u, uint8_t* dst_v, int width) =
+ ARGBToUVRow_C;
+ void (*ARGBToYRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) =
+ ARGBToYRow_C;
+ void (*ARGBExtractAlphaRow)(const uint8_t* src_argb, uint8_t* dst_a,
+ int width) = ARGBExtractAlphaRow_C;
+ if (!src_argb || !dst_y || !dst_u || !dst_v || !dst_a || width <= 0 ||
+ height == 0) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ src_argb = src_argb + (height - 1) * src_stride_argb;
+ src_stride_argb = -src_stride_argb;
+ }
+#if defined(HAS_ARGBTOYROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ ARGBToYRow = ARGBToYRow_Any_NEON;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBToYRow = ARGBToYRow_NEON;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTOUVROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ ARGBToUVRow = ARGBToUVRow_Any_NEON;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBToUVRow = ARGBToUVRow_NEON;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTOYROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ ARGBToYRow = ARGBToYRow_Any_SSSE3;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBToYRow = ARGBToYRow_SSSE3;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTOUVROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBToUVRow = ARGBToUVRow_SSSE3;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTOYROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ ARGBToYRow = ARGBToYRow_Any_AVX2;
+ if (IS_ALIGNED(width, 32)) {
+ ARGBToYRow = ARGBToYRow_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTOUVROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ ARGBToUVRow = ARGBToUVRow_Any_AVX2;
+ if (IS_ALIGNED(width, 32)) {
+ ARGBToUVRow = ARGBToUVRow_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTOYROW_MSA) && defined(HAS_ARGBTOUVROW_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ ARGBToYRow = ARGBToYRow_Any_MSA;
+ ARGBToUVRow = ARGBToUVRow_Any_MSA;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBToYRow = ARGBToYRow_MSA;
+ }
+ if (IS_ALIGNED(width, 32)) {
+ ARGBToUVRow = ARGBToUVRow_MSA;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTOYROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ ARGBToYRow = ARGBToYRow_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBToYRow = ARGBToYRow_LSX;
+ }
+ }
+#endif
#if defined(HAS_ARGBTOYROW_LASX) && defined(HAS_ARGBTOUVROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
ARGBToYRow = ARGBToYRow_Any_LASX;
@@ -1584,22 +2134,63 @@ int ARGBToI420(const uint8_t* src_argb,
}
}
#endif
+#if defined(HAS_ARGBEXTRACTALPHAROW_SSE2)
+ if (TestCpuFlag(kCpuHasSSE2)) {
+ ARGBExtractAlphaRow = IS_ALIGNED(width, 8) ? ARGBExtractAlphaRow_SSE2
+ : ARGBExtractAlphaRow_Any_SSE2;
+ }
+#endif
+#if defined(HAS_ARGBEXTRACTALPHAROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ ARGBExtractAlphaRow = IS_ALIGNED(width, 32) ? ARGBExtractAlphaRow_AVX2
+ : ARGBExtractAlphaRow_Any_AVX2;
+ }
+#endif
+#if defined(HAS_ARGBEXTRACTALPHAROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ ARGBExtractAlphaRow = IS_ALIGNED(width, 16) ? ARGBExtractAlphaRow_NEON
+ : ARGBExtractAlphaRow_Any_NEON;
+ }
+#endif
+#if defined(HAS_ARGBEXTRACTALPHAROW_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ ARGBExtractAlphaRow = IS_ALIGNED(width, 16) ? ARGBExtractAlphaRow_MSA
+ : ARGBExtractAlphaRow_Any_MSA;
+ }
+#endif
+#if defined(HAS_ARGBEXTRACTALPHAROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ ARGBExtractAlphaRow = IS_ALIGNED(width, 16) ? ARGBExtractAlphaRow_LSX
+ : ARGBExtractAlphaRow_Any_LSX;
+ }
+#endif
+#if defined(HAS_ARGBEXTRACTALPHAROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ ARGBExtractAlphaRow = ARGBExtractAlphaRow_RVV;
+ }
+#endif
for (y = 0; y < height - 1; y += 2) {
ARGBToUVRow(src_argb, src_stride_argb, dst_u, dst_v, width);
ARGBToYRow(src_argb, dst_y, width);
ARGBToYRow(src_argb + src_stride_argb, dst_y + dst_stride_y, width);
+ ARGBExtractAlphaRow(src_argb, dst_a, width);
+ ARGBExtractAlphaRow(src_argb + src_stride_argb, dst_a + dst_stride_a,
+ width);
src_argb += src_stride_argb * 2;
dst_y += dst_stride_y * 2;
dst_u += dst_stride_u;
dst_v += dst_stride_v;
+ dst_a += dst_stride_a * 2;
}
if (height & 1) {
ARGBToUVRow(src_argb, 0, dst_u, dst_v, width);
ARGBToYRow(src_argb, dst_y, width);
+ ARGBExtractAlphaRow(src_argb, dst_a, width);
}
return 0;
}
+#endif // USE_EXTRACTALPHA
// Convert BGRA to I420.
LIBYUV_API
@@ -1628,16 +2219,6 @@ int BGRAToI420(const uint8_t* src_bgra,
src_bgra = src_bgra + (height - 1) * src_stride_bgra;
src_stride_bgra = -src_stride_bgra;
}
-#if defined(HAS_BGRATOYROW_SSSE3) && defined(HAS_BGRATOUVROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3)) {
- BGRAToUVRow = BGRAToUVRow_Any_SSSE3;
- BGRAToYRow = BGRAToYRow_Any_SSSE3;
- if (IS_ALIGNED(width, 16)) {
- BGRAToUVRow = BGRAToUVRow_SSSE3;
- BGRAToYRow = BGRAToYRow_SSSE3;
- }
- }
-#endif
#if defined(HAS_BGRATOYROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
BGRAToYRow = BGRAToYRow_Any_NEON;
@@ -1654,12 +2235,46 @@ int BGRAToI420(const uint8_t* src_bgra,
}
}
#endif
+#if defined(HAS_BGRATOYROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ BGRAToYRow = BGRAToYRow_Any_SSSE3;
+ if (IS_ALIGNED(width, 16)) {
+ BGRAToYRow = BGRAToYRow_SSSE3;
+ }
+ }
+#endif
+#if defined(HAS_BGRATOUVROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ BGRAToUVRow = BGRAToUVRow_Any_SSSE3;
+ if (IS_ALIGNED(width, 16)) {
+ BGRAToUVRow = BGRAToUVRow_SSSE3;
+ }
+ }
+#endif
+#if defined(HAS_BGRATOYROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ BGRAToYRow = BGRAToYRow_Any_AVX2;
+ if (IS_ALIGNED(width, 32)) {
+ BGRAToYRow = BGRAToYRow_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_BGRATOUVROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ BGRAToUVRow = BGRAToUVRow_Any_AVX2;
+ if (IS_ALIGNED(width, 32)) {
+ BGRAToUVRow = BGRAToUVRow_AVX2;
+ }
+ }
+#endif
#if defined(HAS_BGRATOYROW_MSA) && defined(HAS_BGRATOUVROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
BGRAToYRow = BGRAToYRow_Any_MSA;
BGRAToUVRow = BGRAToUVRow_Any_MSA;
if (IS_ALIGNED(width, 16)) {
BGRAToYRow = BGRAToYRow_MSA;
+ }
+ if (IS_ALIGNED(width, 32)) {
BGRAToUVRow = BGRAToUVRow_MSA;
}
}
@@ -1674,6 +2289,19 @@ int BGRAToI420(const uint8_t* src_bgra,
}
}
#endif
+#if defined(HAS_BGRATOYROW_LASX)
+ if (TestCpuFlag(kCpuHasLASX)) {
+ BGRAToYRow = BGRAToYRow_Any_LASX;
+ if (IS_ALIGNED(width, 32)) {
+ BGRAToYRow = BGRAToYRow_LASX;
+ }
+ }
+#endif
+#if defined(HAS_BGRATOYROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ BGRAToYRow = BGRAToYRow_RVV;
+ }
+#endif
for (y = 0; y < height - 1; y += 2) {
BGRAToUVRow(src_bgra, src_stride_bgra, dst_u, dst_v, width);
@@ -1786,6 +2414,19 @@ int ABGRToI420(const uint8_t* src_abgr,
}
}
#endif
+#if defined(HAS_ABGRTOYROW_LASX)
+ if (TestCpuFlag(kCpuHasLASX)) {
+ ABGRToYRow = ABGRToYRow_Any_LASX;
+ if (IS_ALIGNED(width, 32)) {
+ ABGRToYRow = ABGRToYRow_LASX;
+ }
+ }
+#endif
+#if defined(HAS_ABGRTOYROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ ABGRToYRow = ABGRToYRow_RVV;
+ }
+#endif
for (y = 0; y < height - 1; y += 2) {
ABGRToUVRow(src_abgr, src_stride_abgr, dst_u, dst_v, width);
@@ -1882,6 +2523,19 @@ int RGBAToI420(const uint8_t* src_rgba,
}
}
#endif
+#if defined(HAS_RGBATOYROW_LASX)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ RGBAToYRow = RGBAToYRow_Any_LASX;
+ if (IS_ALIGNED(width, 32)) {
+ RGBAToYRow = RGBAToYRow_LASX;
+ }
+ }
+#endif
+#if defined(HAS_RGBATOYROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ RGBAToYRow = RGBAToYRow_RVV;
+ }
+#endif
for (y = 0; y < height - 1; y += 2) {
RGBAToUVRow(src_rgba, src_stride_rgba, dst_u, dst_v, width);
@@ -1901,7 +2555,7 @@ int RGBAToI420(const uint8_t* src_rgba,
// Enabled if 1 pass is available
#if (defined(HAS_RGB24TOYROW_NEON) || defined(HAS_RGB24TOYROW_MSA) || \
- defined(HAS_RGB24TOYROW_LSX))
+ defined(HAS_RGB24TOYROW_LSX) || defined(HAS_RGB24TOYROW_RVV))
#define HAS_RGB24TOYROW
#endif
@@ -1986,6 +2640,11 @@ int RGB24ToI420(const uint8_t* src_rgb24,
}
}
#endif
+#if defined(HAS_RGB24TOYROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ RGB24ToYRow = RGB24ToYRow_RVV;
+ }
+#endif
// Other platforms do intermediate conversion from RGB24 to ARGB.
#else // HAS_RGB24TOYROW
@@ -2035,8 +2694,10 @@ int RGB24ToI420(const uint8_t* src_rgb24,
{
#if !defined(HAS_RGB24TOYROW)
// Allocate 2 rows of ARGB.
- const int kRowSize = (width * 4 + 31) & ~31;
- align_buffer_64(row, kRowSize * 2);
+ const int row_size = (width * 4 + 31) & ~31;
+ align_buffer_64(row, row_size * 2);
+ if (!row)
+ return 1;
#endif
for (y = 0; y < height - 1; y += 2) {
@@ -2046,10 +2707,10 @@ int RGB24ToI420(const uint8_t* src_rgb24,
RGB24ToYRow(src_rgb24 + src_stride_rgb24, dst_y + dst_stride_y, width);
#else
RGB24ToARGBRow(src_rgb24, row, width);
- RGB24ToARGBRow(src_rgb24 + src_stride_rgb24, row + kRowSize, width);
- ARGBToUVRow(row, kRowSize, dst_u, dst_v, width);
+ RGB24ToARGBRow(src_rgb24 + src_stride_rgb24, row + row_size, width);
+ ARGBToUVRow(row, row_size, dst_u, dst_v, width);
ARGBToYRow(row, dst_y, width);
- ARGBToYRow(row + kRowSize, dst_y + dst_stride_y, width);
+ ARGBToYRow(row + row_size, dst_y + dst_stride_y, width);
#endif
src_rgb24 += src_stride_rgb24 * 2;
dst_y += dst_stride_y * 2;
@@ -2075,7 +2736,8 @@ int RGB24ToI420(const uint8_t* src_rgb24,
#undef HAS_RGB24TOYROW
// Enabled if 1 pass is available
-#if defined(HAS_RGB24TOYJROW_NEON) || defined(HAS_RGB24TOYJROW_MSA)
+#if defined(HAS_RGB24TOYJROW_NEON) || defined(HAS_RGB24TOYJROW_MSA) || \
+ defined(HAS_RGB24TOYJROW_RVV)
#define HAS_RGB24TOYJROW
#endif
@@ -2140,6 +2802,27 @@ int RGB24ToJ420(const uint8_t* src_rgb24,
}
}
#endif
+#if defined(HAS_RGB24TOYJROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ RGB24ToYJRow = RGB24ToYJRow_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ RGB24ToYJRow = RGB24ToYJRow_LSX;
+ }
+ }
+#endif
+#if defined(HAS_RGB24TOYJROW_LASX)
+ if (TestCpuFlag(kCpuHasLASX)) {
+ RGB24ToYJRow = RGB24ToYJRow_Any_LASX;
+ if (IS_ALIGNED(width, 32)) {
+ RGB24ToYJRow = RGB24ToYJRow_LASX;
+ }
+ }
+#endif
+#if defined(HAS_RGB24TOYJROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ RGB24ToYJRow = RGB24ToYJRow_RVV;
+ }
+#endif
// Other platforms do intermediate conversion from RGB24 to ARGB.
#else // HAS_RGB24TOYJROW
@@ -2189,8 +2872,10 @@ int RGB24ToJ420(const uint8_t* src_rgb24,
{
#if !defined(HAS_RGB24TOYJROW)
// Allocate 2 rows of ARGB.
- const int kRowSize = (width * 4 + 31) & ~31;
- align_buffer_64(row, kRowSize * 2);
+ const int row_size = (width * 4 + 31) & ~31;
+ align_buffer_64(row, row_size * 2);
+ if (!row)
+ return 1;
#endif
for (y = 0; y < height - 1; y += 2) {
@@ -2200,10 +2885,10 @@ int RGB24ToJ420(const uint8_t* src_rgb24,
RGB24ToYJRow(src_rgb24 + src_stride_rgb24, dst_y + dst_stride_y, width);
#else
RGB24ToARGBRow(src_rgb24, row, width);
- RGB24ToARGBRow(src_rgb24 + src_stride_rgb24, row + kRowSize, width);
- ARGBToUVJRow(row, kRowSize, dst_u, dst_v, width);
+ RGB24ToARGBRow(src_rgb24 + src_stride_rgb24, row + row_size, width);
+ ARGBToUVJRow(row, row_size, dst_u, dst_v, width);
ARGBToYJRow(row, dst_y, width);
- ARGBToYJRow(row + kRowSize, dst_y + dst_stride_y, width);
+ ARGBToYJRow(row + row_size, dst_y + dst_stride_y, width);
#endif
src_rgb24 += src_stride_rgb24 * 2;
dst_y += dst_stride_y * 2;
@@ -2230,7 +2915,7 @@ int RGB24ToJ420(const uint8_t* src_rgb24,
// Enabled if 1 pass is available
#if (defined(HAS_RAWTOYROW_NEON) || defined(HAS_RAWTOYROW_MSA) || \
- defined(HAS_RAWTOYROW_LSX))
+ defined(HAS_RAWTOYROW_LSX) || defined(HAS_RAWTOYROW_RVV))
#define HAS_RAWTOYROW
#endif
@@ -2314,6 +2999,11 @@ int RAWToI420(const uint8_t* src_raw,
}
}
#endif
+#if defined(HAS_RAWTOYROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ RAWToYRow = RAWToYRow_RVV;
+ }
+#endif
// Other platforms do intermediate conversion from RAW to ARGB.
#else // HAS_RAWTOYROW
@@ -2363,8 +3053,10 @@ int RAWToI420(const uint8_t* src_raw,
{
#if !defined(HAS_RAWTOYROW)
// Allocate 2 rows of ARGB.
- const int kRowSize = (width * 4 + 31) & ~31;
- align_buffer_64(row, kRowSize * 2);
+ const int row_size = (width * 4 + 31) & ~31;
+ align_buffer_64(row, row_size * 2);
+ if (!row)
+ return 1;
#endif
for (y = 0; y < height - 1; y += 2) {
@@ -2374,10 +3066,10 @@ int RAWToI420(const uint8_t* src_raw,
RAWToYRow(src_raw + src_stride_raw, dst_y + dst_stride_y, width);
#else
RAWToARGBRow(src_raw, row, width);
- RAWToARGBRow(src_raw + src_stride_raw, row + kRowSize, width);
- ARGBToUVRow(row, kRowSize, dst_u, dst_v, width);
+ RAWToARGBRow(src_raw + src_stride_raw, row + row_size, width);
+ ARGBToUVRow(row, row_size, dst_u, dst_v, width);
ARGBToYRow(row, dst_y, width);
- ARGBToYRow(row + kRowSize, dst_y + dst_stride_y, width);
+ ARGBToYRow(row + row_size, dst_y + dst_stride_y, width);
#endif
src_raw += src_stride_raw * 2;
dst_y += dst_stride_y * 2;
@@ -2403,7 +3095,8 @@ int RAWToI420(const uint8_t* src_raw,
#undef HAS_RAWTOYROW
// Enabled if 1 pass is available
-#if defined(HAS_RAWTOYJROW_NEON) || defined(HAS_RAWTOYJROW_MSA)
+#if defined(HAS_RAWTOYJROW_NEON) || defined(HAS_RAWTOYJROW_MSA) || \
+ defined(HAS_RAWTOYJROW_RVV)
#define HAS_RAWTOYJROW
#endif
@@ -2468,6 +3161,27 @@ int RAWToJ420(const uint8_t* src_raw,
}
}
#endif
+#if defined(HAS_RAWTOYJROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ RAWToYJRow = RAWToYJRow_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ RAWToYJRow = RAWToYJRow_LSX;
+ }
+ }
+#endif
+#if defined(HAS_RAWTOYJROW_LASX)
+ if (TestCpuFlag(kCpuHasLASX)) {
+ RAWToYJRow = RAWToYJRow_Any_LASX;
+ if (IS_ALIGNED(width, 32)) {
+ RAWToYJRow = RAWToYJRow_LASX;
+ }
+ }
+#endif
+#if defined(HAS_RAWTOYJROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ RAWToYJRow = RAWToYJRow_RVV;
+ }
+#endif
// Other platforms do intermediate conversion from RAW to ARGB.
#else // HAS_RAWTOYJROW
@@ -2517,8 +3231,10 @@ int RAWToJ420(const uint8_t* src_raw,
{
#if !defined(HAS_RAWTOYJROW)
// Allocate 2 rows of ARGB.
- const int kRowSize = (width * 4 + 31) & ~31;
- align_buffer_64(row, kRowSize * 2);
+ const int row_size = (width * 4 + 31) & ~31;
+ align_buffer_64(row, row_size * 2);
+ if (!row)
+ return 1;
#endif
for (y = 0; y < height - 1; y += 2) {
@@ -2528,10 +3244,10 @@ int RAWToJ420(const uint8_t* src_raw,
RAWToYJRow(src_raw + src_stride_raw, dst_y + dst_stride_y, width);
#else
RAWToARGBRow(src_raw, row, width);
- RAWToARGBRow(src_raw + src_stride_raw, row + kRowSize, width);
- ARGBToUVJRow(row, kRowSize, dst_u, dst_v, width);
+ RAWToARGBRow(src_raw + src_stride_raw, row + row_size, width);
+ ARGBToUVJRow(row, row_size, dst_u, dst_v, width);
ARGBToYJRow(row, dst_y, width);
- ARGBToYJRow(row + kRowSize, dst_y + dst_stride_y, width);
+ ARGBToYJRow(row + row_size, dst_y + dst_stride_y, width);
#endif
src_raw += src_stride_raw * 2;
dst_y += dst_stride_y * 2;
@@ -2695,8 +3411,10 @@ int RGB565ToI420(const uint8_t* src_rgb565,
#if !(defined(HAS_RGB565TOYROW_NEON) || defined(HAS_RGB565TOYROW_MSA) || \
defined(HAS_RGB565TOYROW_LSX) || defined(HAS_RGB565TOYROW_LASX))
// Allocate 2 rows of ARGB.
- const int kRowSize = (width * 4 + 31) & ~31;
- align_buffer_64(row, kRowSize * 2);
+ const int row_size = (width * 4 + 31) & ~31;
+ align_buffer_64(row, row_size * 2);
+ if (!row)
+ return 1;
#endif
for (y = 0; y < height - 1; y += 2) {
#if (defined(HAS_RGB565TOYROW_NEON) || defined(HAS_RGB565TOYROW_MSA) || \
@@ -2706,10 +3424,10 @@ int RGB565ToI420(const uint8_t* src_rgb565,
RGB565ToYRow(src_rgb565 + src_stride_rgb565, dst_y + dst_stride_y, width);
#else
RGB565ToARGBRow(src_rgb565, row, width);
- RGB565ToARGBRow(src_rgb565 + src_stride_rgb565, row + kRowSize, width);
- ARGBToUVRow(row, kRowSize, dst_u, dst_v, width);
+ RGB565ToARGBRow(src_rgb565 + src_stride_rgb565, row + row_size, width);
+ ARGBToUVRow(row, row_size, dst_u, dst_v, width);
ARGBToYRow(row, dst_y, width);
- ARGBToYRow(row + kRowSize, dst_y + dst_stride_y, width);
+ ARGBToYRow(row + row_size, dst_y + dst_stride_y, width);
#endif
src_rgb565 += src_stride_rgb565 * 2;
dst_y += dst_stride_y * 2;
@@ -2875,8 +3593,10 @@ int ARGB1555ToI420(const uint8_t* src_argb1555,
#if !(defined(HAS_ARGB1555TOYROW_NEON) || defined(HAS_ARGB1555TOYROW_MSA) || \
defined(HAS_ARGB1555TOYROW_LSX) || defined(HAS_ARGB1555TOYROW_LASX))
// Allocate 2 rows of ARGB.
- const int kRowSize = (width * 4 + 31) & ~31;
- align_buffer_64(row, kRowSize * 2);
+ const int row_size = (width * 4 + 31) & ~31;
+ align_buffer_64(row, row_size * 2);
+ if (!row)
+ return 1;
#endif
for (y = 0; y < height - 1; y += 2) {
@@ -2888,11 +3608,11 @@ int ARGB1555ToI420(const uint8_t* src_argb1555,
width);
#else
ARGB1555ToARGBRow(src_argb1555, row, width);
- ARGB1555ToARGBRow(src_argb1555 + src_stride_argb1555, row + kRowSize,
+ ARGB1555ToARGBRow(src_argb1555 + src_stride_argb1555, row + row_size,
width);
- ARGBToUVRow(row, kRowSize, dst_u, dst_v, width);
+ ARGBToUVRow(row, row_size, dst_u, dst_v, width);
ARGBToYRow(row, dst_y, width);
- ARGBToYRow(row + kRowSize, dst_y + dst_stride_y, width);
+ ARGBToYRow(row + row_size, dst_y + dst_stride_y, width);
#endif
src_argb1555 += src_stride_argb1555 * 2;
dst_y += dst_stride_y * 2;
@@ -3055,6 +3775,24 @@ int ARGB4444ToI420(const uint8_t* src_argb4444,
}
}
#endif
+#if defined(HAS_ARGBTOYROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ ARGBToYRow = ARGBToYRow_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBToYRow = ARGBToYRow_LSX;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTOYROW_LSX) && defined(HAS_ARGBTOUVROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ ARGBToYRow = ARGBToYRow_Any_LSX;
+ ARGBToUVRow = ARGBToUVRow_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBToYRow = ARGBToYRow_LSX;
+ ARGBToUVRow = ARGBToUVRow_LSX;
+ }
+ }
+#endif
#if defined(HAS_ARGBTOYROW_LASX) && defined(HAS_ARGBTOUVROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
ARGBToYRow = ARGBToYRow_Any_LASX;
@@ -3070,8 +3808,10 @@ int ARGB4444ToI420(const uint8_t* src_argb4444,
{
#if !(defined(HAS_ARGB4444TOYROW_NEON))
// Allocate 2 rows of ARGB.
- const int kRowSize = (width * 4 + 31) & ~31;
- align_buffer_64(row, kRowSize * 2);
+ const int row_size = (width * 4 + 31) & ~31;
+ align_buffer_64(row, row_size * 2);
+ if (!row)
+ return 1;
#endif
for (y = 0; y < height - 1; y += 2) {
@@ -3082,11 +3822,11 @@ int ARGB4444ToI420(const uint8_t* src_argb4444,
width);
#else
ARGB4444ToARGBRow(src_argb4444, row, width);
- ARGB4444ToARGBRow(src_argb4444 + src_stride_argb4444, row + kRowSize,
+ ARGB4444ToARGBRow(src_argb4444 + src_stride_argb4444, row + row_size,
width);
- ARGBToUVRow(row, kRowSize, dst_u, dst_v, width);
+ ARGBToUVRow(row, row_size, dst_u, dst_v, width);
ARGBToYRow(row, dst_y, width);
- ARGBToYRow(row + kRowSize, dst_y + dst_stride_y, width);
+ ARGBToYRow(row + row_size, dst_y + dst_stride_y, width);
#endif
src_argb4444 += src_stride_argb4444 * 2;
dst_y += dst_stride_y * 2;
@@ -3167,6 +3907,27 @@ int RGB24ToJ400(const uint8_t* src_rgb24,
}
}
#endif
+#if defined(HAS_RGB24TOYJROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ RGB24ToYJRow = RGB24ToYJRow_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ RGB24ToYJRow = RGB24ToYJRow_LSX;
+ }
+ }
+#endif
+#if defined(HAS_RGB24TOYJROW_LASX)
+ if (TestCpuFlag(kCpuHasLASX)) {
+ RGB24ToYJRow = RGB24ToYJRow_Any_LASX;
+ if (IS_ALIGNED(width, 32)) {
+ RGB24ToYJRow = RGB24ToYJRow_LASX;
+ }
+ }
+#endif
+#if defined(HAS_RGB24TOYJROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ RGB24ToYJRow = RGB24ToYJRow_RVV;
+ }
+#endif
for (y = 0; y < height; ++y) {
RGB24ToYJRow(src_rgb24, dst_yj, width);
@@ -3235,6 +3996,27 @@ int RAWToJ400(const uint8_t* src_raw,
}
}
#endif
+#if defined(HAS_RAWTOYJROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ RAWToYJRow = RAWToYJRow_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ RAWToYJRow = RAWToYJRow_LSX;
+ }
+ }
+#endif
+#if defined(HAS_RAWTOYJROW_LASX)
+ if (TestCpuFlag(kCpuHasLASX)) {
+ RAWToYJRow = RAWToYJRow_Any_LASX;
+ if (IS_ALIGNED(width, 32)) {
+ RAWToYJRow = RAWToYJRow_LASX;
+ }
+ }
+#endif
+#if defined(HAS_RAWTOYJROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ RAWToYJRow = RAWToYJRow_RVV;
+ }
+#endif
for (y = 0; y < height; ++y) {
RAWToYJRow(src_raw, dst_yj, width);
diff --git a/files/source/convert_argb.cc b/source/convert_argb.cc
index 71ef8c10..871fea59 100644
--- a/files/source/convert_argb.cc
+++ b/source/convert_argb.cc
@@ -7,8 +7,12 @@
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
+
#include "libyuv/convert_argb.h"
+#include <assert.h>
+
+#include "libyuv/convert_from_argb.h"
#include "libyuv/cpu_id.h"
#ifdef HAVE_JPEG
#include "libyuv/mjpeg_decoder.h"
@@ -65,6 +69,7 @@ int I420ToARGBMatrix(const uint8_t* src_y,
const uint8_t* v_buf, uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants, int width) =
I422ToARGBRow_C;
+ assert(yuvconstants);
if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) {
return -1;
}
@@ -115,6 +120,14 @@ int I420ToARGBMatrix(const uint8_t* src_y,
}
}
#endif
+#if defined(HAS_I422TOARGBROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ I422ToARGBRow = I422ToARGBRow_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ I422ToARGBRow = I422ToARGBRow_LSX;
+ }
+ }
+#endif
#if defined(HAS_I422TOARGBROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
I422ToARGBRow = I422ToARGBRow_Any_LASX;
@@ -123,6 +136,11 @@ int I420ToARGBMatrix(const uint8_t* src_y,
}
}
#endif
+#if defined(HAS_I422TOARGBROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ I422ToARGBRow = I422ToARGBRow_RVV;
+ }
+#endif
for (y = 0; y < height; ++y) {
I422ToARGBRow(src_y, src_u, src_v, dst_argb, yuvconstants, width);
@@ -298,6 +316,7 @@ int I422ToARGBMatrix(const uint8_t* src_y,
const uint8_t* v_buf, uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants, int width) =
I422ToARGBRow_C;
+ assert(yuvconstants);
if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) {
return -1;
}
@@ -355,6 +374,14 @@ int I422ToARGBMatrix(const uint8_t* src_y,
}
}
#endif
+#if defined(HAS_I422TOARGBROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ I422ToARGBRow = I422ToARGBRow_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ I422ToARGBRow = I422ToARGBRow_LSX;
+ }
+ }
+#endif
#if defined(HAS_I422TOARGBROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
I422ToARGBRow = I422ToARGBRow_Any_LASX;
@@ -363,6 +390,11 @@ int I422ToARGBMatrix(const uint8_t* src_y,
}
}
#endif
+#if defined(HAS_I422TOARGBROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ I422ToARGBRow = I422ToARGBRow_RVV;
+ }
+#endif
for (y = 0; y < height; ++y) {
I422ToARGBRow(src_y, src_u, src_v, dst_argb, yuvconstants, width);
@@ -536,6 +568,7 @@ int I444ToARGBMatrix(const uint8_t* src_y,
const uint8_t* v_buf, uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants, int width) =
I444ToARGBRow_C;
+ assert(yuvconstants);
if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) {
return -1;
}
@@ -592,6 +625,11 @@ int I444ToARGBMatrix(const uint8_t* src_y,
}
}
#endif
+#if defined(HAS_I444TOARGBROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ I444ToARGBRow = I444ToARGBRow_RVV;
+ }
+#endif
for (y = 0; y < height; ++y) {
I444ToARGBRow(src_y, src_u, src_v, dst_argb, yuvconstants, width);
@@ -747,6 +785,133 @@ int U444ToABGR(const uint8_t* src_y,
width, height);
}
+// Convert I444 to RGB24 with matrix.
+LIBYUV_API
+int I444ToRGB24Matrix(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_rgb24,
+ int dst_stride_rgb24,
+ const struct YuvConstants* yuvconstants,
+ int width,
+ int height) {
+ int y;
+ void (*I444ToRGB24Row)(const uint8_t* y_buf, const uint8_t* u_buf,
+ const uint8_t* v_buf, uint8_t* rgb_buf,
+ const struct YuvConstants* yuvconstants, int width) =
+ I444ToRGB24Row_C;
+ assert(yuvconstants);
+ if (!src_y || !src_u || !src_v || !dst_rgb24 || width <= 0 || height == 0) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ dst_rgb24 = dst_rgb24 + (height - 1) * dst_stride_rgb24;
+ dst_stride_rgb24 = -dst_stride_rgb24;
+ }
+ // Coalesce rows.
+ if (src_stride_y == width && src_stride_u == width && src_stride_v == width &&
+ dst_stride_rgb24 == width * 3) {
+ width *= height;
+ height = 1;
+ src_stride_y = src_stride_u = src_stride_v = dst_stride_rgb24 = 0;
+ }
+#if defined(HAS_I444TORGB24ROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ I444ToRGB24Row = I444ToRGB24Row_Any_SSSE3;
+ if (IS_ALIGNED(width, 16)) {
+ I444ToRGB24Row = I444ToRGB24Row_SSSE3;
+ }
+ }
+#endif
+#if defined(HAS_I444TORGB24ROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ I444ToRGB24Row = I444ToRGB24Row_Any_AVX2;
+ if (IS_ALIGNED(width, 32)) {
+ I444ToRGB24Row = I444ToRGB24Row_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_I444TORGB24ROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ I444ToRGB24Row = I444ToRGB24Row_Any_NEON;
+ if (IS_ALIGNED(width, 8)) {
+ I444ToRGB24Row = I444ToRGB24Row_NEON;
+ }
+ }
+#endif
+#if defined(HAS_I444TORGB24ROW_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ I444ToRGB24Row = I444ToRGB24Row_Any_MSA;
+ if (IS_ALIGNED(width, 8)) {
+ I444ToRGB24Row = I444ToRGB24Row_MSA;
+ }
+ }
+#endif
+#if defined(HAS_I444TORGB24ROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ I444ToRGB24Row = I444ToRGB24Row_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ I444ToRGB24Row = I444ToRGB24Row_LSX;
+ }
+ }
+#endif
+#if defined(HAS_I444TORGB24ROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ I444ToRGB24Row = I444ToRGB24Row_RVV;
+ }
+#endif
+
+ for (y = 0; y < height; ++y) {
+ I444ToRGB24Row(src_y, src_u, src_v, dst_rgb24, yuvconstants, width);
+ dst_rgb24 += dst_stride_rgb24;
+ src_y += src_stride_y;
+ src_u += src_stride_u;
+ src_v += src_stride_v;
+ }
+ return 0;
+}
+
+// Convert I444 to RGB24.
+LIBYUV_API
+int I444ToRGB24(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_rgb24,
+ int dst_stride_rgb24,
+ int width,
+ int height) {
+ return I444ToRGB24Matrix(src_y, src_stride_y, src_u, src_stride_u, src_v,
+ src_stride_v, dst_rgb24, dst_stride_rgb24,
+ &kYuvI601Constants, width, height);
+}
+
+// Convert I444 to RAW.
+LIBYUV_API
+int I444ToRAW(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_raw,
+ int dst_stride_raw,
+ int width,
+ int height) {
+ return I444ToRGB24Matrix(src_y, src_stride_y, src_v,
+ src_stride_v, // Swap U and V
+ src_u, src_stride_u, dst_raw, dst_stride_raw,
+ &kYvuI601Constants, // Use Yvu matrix
+ width, height);
+}
+
// Convert 10 bit YUV to ARGB with matrix.
// TODO(fbarchard): Consider passing scale multiplier to I210ToARGB to
// multiply 10 bit yuv into high bits to allow any number of bits.
@@ -767,6 +932,7 @@ int I010ToAR30Matrix(const uint16_t* src_y,
const uint16_t* v_buf, uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants, int width) =
I210ToAR30Row_C;
+ assert(yuvconstants);
if (!src_y || !src_u || !src_v || !dst_ar30 || width <= 0 || height == 0) {
return -1;
}
@@ -926,6 +1092,7 @@ int I012ToAR30Matrix(const uint16_t* src_y,
const uint16_t* v_buf, uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants, int width) =
I212ToAR30Row_C;
+ assert(yuvconstants);
if (!src_y || !src_u || !src_v || !dst_ar30 || width <= 0 || height == 0) {
return -1;
}
@@ -983,6 +1150,7 @@ int I210ToAR30Matrix(const uint16_t* src_y,
const uint16_t* v_buf, uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants, int width) =
I210ToAR30Row_C;
+ assert(yuvconstants);
if (!src_y || !src_u || !src_v || !dst_ar30 || width <= 0 || height == 0) {
return -1;
}
@@ -1137,6 +1305,7 @@ int I410ToAR30Matrix(const uint16_t* src_y,
const uint16_t* v_buf, uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants, int width) =
I410ToAR30Row_C;
+ assert(yuvconstants);
if (!src_y || !src_u || !src_v || !dst_ar30 || width <= 0 || height == 0) {
return -1;
}
@@ -1190,6 +1359,7 @@ int I010ToARGBMatrix(const uint16_t* src_y,
const uint16_t* v_buf, uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants, int width) =
I210ToARGBRow_C;
+ assert(yuvconstants);
if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) {
return -1;
}
@@ -1353,6 +1523,7 @@ int I012ToARGBMatrix(const uint16_t* src_y,
const uint16_t* v_buf, uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants, int width) =
I212ToARGBRow_C;
+ assert(yuvconstants);
if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) {
return -1;
}
@@ -1408,6 +1579,7 @@ int I210ToARGBMatrix(const uint16_t* src_y,
const uint16_t* v_buf, uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants, int width) =
I210ToARGBRow_C;
+ assert(yuvconstants);
if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) {
return -1;
}
@@ -1568,6 +1740,7 @@ int I410ToARGBMatrix(const uint16_t* src_y,
const uint16_t* v_buf, uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants, int width) =
I410ToARGBRow_C;
+ assert(yuvconstants);
if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) {
return -1;
}
@@ -1617,6 +1790,7 @@ int P010ToARGBMatrix(const uint16_t* src_y,
void (*P210ToARGBRow)(
const uint16_t* y_buf, const uint16_t* uv_buf, uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants, int width) = P210ToARGBRow_C;
+ assert(yuvconstants);
if (!src_y || !src_uv || !dst_argb || width <= 0 || height == 0) {
return -1;
}
@@ -1667,6 +1841,7 @@ int P210ToARGBMatrix(const uint16_t* src_y,
void (*P210ToARGBRow)(
const uint16_t* y_buf, const uint16_t* uv_buf, uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants, int width) = P210ToARGBRow_C;
+ assert(yuvconstants);
if (!src_y || !src_uv || !dst_argb || width <= 0 || height == 0) {
return -1;
}
@@ -1715,6 +1890,7 @@ int P010ToAR30Matrix(const uint16_t* src_y,
void (*P210ToAR30Row)(
const uint16_t* y_buf, const uint16_t* uv_buf, uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants, int width) = P210ToAR30Row_C;
+ assert(yuvconstants);
if (!src_y || !src_uv || !dst_ar30 || width <= 0 || height == 0) {
return -1;
}
@@ -1765,6 +1941,7 @@ int P210ToAR30Matrix(const uint16_t* src_y,
void (*P210ToAR30Row)(
const uint16_t* y_buf, const uint16_t* uv_buf, uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants, int width) = P210ToAR30Row_C;
+ assert(yuvconstants);
if (!src_y || !src_uv || !dst_ar30 || width <= 0 || height == 0) {
return -1;
}
@@ -1823,6 +2000,7 @@ int I420AlphaToARGBMatrix(const uint8_t* src_y,
int width) = I422AlphaToARGBRow_C;
void (*ARGBAttenuateRow)(const uint8_t* src_argb, uint8_t* dst_argb,
int width) = ARGBAttenuateRow_C;
+ assert(yuvconstants);
if (!src_y || !src_u || !src_v || !src_a || !dst_argb || width <= 0 ||
height == 0) {
return -1;
@@ -1865,6 +2043,14 @@ int I420AlphaToARGBMatrix(const uint8_t* src_y,
}
}
#endif
+#if defined(HAS_I422ALPHATOARGBROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ I422AlphaToARGBRow = I422AlphaToARGBRow_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ I422AlphaToARGBRow = I422AlphaToARGBRow_LSX;
+ }
+ }
+#endif
#if defined(HAS_I422ALPHATOARGBROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
I422AlphaToARGBRow = I422AlphaToARGBRow_Any_LASX;
@@ -1873,6 +2059,11 @@ int I420AlphaToARGBMatrix(const uint8_t* src_y,
}
}
#endif
+#if defined(HAS_I422ALPHATOARGBROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ I422AlphaToARGBRow = I422AlphaToARGBRow_RVV;
+ }
+#endif
#if defined(HAS_ARGBATTENUATEROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBAttenuateRow = ARGBAttenuateRow_Any_SSSE3;
@@ -1905,6 +2096,11 @@ int I420AlphaToARGBMatrix(const uint8_t* src_y,
}
}
#endif
+#if defined(HAS_ARGBATTENUATEROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ ARGBAttenuateRow = ARGBAttenuateRow_RVV;
+ }
+#endif
for (y = 0; y < height; ++y) {
I422AlphaToARGBRow(src_y, src_u, src_v, src_a, dst_argb, yuvconstants,
@@ -1947,6 +2143,7 @@ int I422AlphaToARGBMatrix(const uint8_t* src_y,
int width) = I422AlphaToARGBRow_C;
void (*ARGBAttenuateRow)(const uint8_t* src_argb, uint8_t* dst_argb,
int width) = ARGBAttenuateRow_C;
+ assert(yuvconstants);
if (!src_y || !src_u || !src_v || !src_a || !dst_argb || width <= 0 ||
height == 0) {
return -1;
@@ -1989,6 +2186,14 @@ int I422AlphaToARGBMatrix(const uint8_t* src_y,
}
}
#endif
+#if defined(HAS_I422ALPHATOARGBROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ I422AlphaToARGBRow = I422AlphaToARGBRow_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ I422AlphaToARGBRow = I422AlphaToARGBRow_LSX;
+ }
+ }
+#endif
#if defined(HAS_I422ALPHATOARGBROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
I422AlphaToARGBRow = I422AlphaToARGBRow_Any_LASX;
@@ -1997,6 +2202,11 @@ int I422AlphaToARGBMatrix(const uint8_t* src_y,
}
}
#endif
+#if defined(HAS_I422ALPHATOARGBROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ I422AlphaToARGBRow = I422AlphaToARGBRow_RVV;
+ }
+#endif
#if defined(HAS_ARGBATTENUATEROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBAttenuateRow = ARGBAttenuateRow_Any_SSSE3;
@@ -2029,6 +2239,11 @@ int I422AlphaToARGBMatrix(const uint8_t* src_y,
}
}
#endif
+#if defined(HAS_ARGBATTENUATEROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ ARGBAttenuateRow = ARGBAttenuateRow_RVV;
+ }
+#endif
for (y = 0; y < height; ++y) {
I422AlphaToARGBRow(src_y, src_u, src_v, src_a, dst_argb, yuvconstants,
@@ -2069,6 +2284,7 @@ int I444AlphaToARGBMatrix(const uint8_t* src_y,
int width) = I444AlphaToARGBRow_C;
void (*ARGBAttenuateRow)(const uint8_t* src_argb, uint8_t* dst_argb,
int width) = ARGBAttenuateRow_C;
+ assert(yuvconstants);
if (!src_y || !src_u || !src_v || !src_a || !dst_argb || width <= 0 ||
height == 0) {
return -1;
@@ -2111,6 +2327,11 @@ int I444AlphaToARGBMatrix(const uint8_t* src_y,
}
}
#endif
+#if defined(HAS_I444ALPHATOARGBROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ I444AlphaToARGBRow = I444AlphaToARGBRow_RVV;
+ }
+#endif
#if defined(HAS_ARGBATTENUATEROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBAttenuateRow = ARGBAttenuateRow_Any_SSSE3;
@@ -2143,6 +2364,11 @@ int I444AlphaToARGBMatrix(const uint8_t* src_y,
}
}
#endif
+#if defined(HAS_ARGBATTENUATEROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ ARGBAttenuateRow = ARGBAttenuateRow_RVV;
+ }
+#endif
for (y = 0; y < height; ++y) {
I444AlphaToARGBRow(src_y, src_u, src_v, src_a, dst_argb, yuvconstants,
@@ -2312,6 +2538,7 @@ int I010AlphaToARGBMatrix(const uint16_t* src_y,
int width) = I210AlphaToARGBRow_C;
void (*ARGBAttenuateRow)(const uint8_t* src_argb, uint8_t* dst_argb,
int width) = ARGBAttenuateRow_C;
+ assert(yuvconstants);
if (!src_y || !src_u || !src_v || !src_a || !dst_argb || width <= 0 ||
height == 0) {
return -1;
@@ -2370,6 +2597,11 @@ int I010AlphaToARGBMatrix(const uint16_t* src_y,
}
}
#endif
+#if defined(HAS_ARGBATTENUATEROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ ARGBAttenuateRow = ARGBAttenuateRow_RVV;
+ }
+#endif
for (y = 0; y < height; ++y) {
I210AlphaToARGBRow(src_y, src_u, src_v, src_a, dst_argb, yuvconstants,
@@ -2412,6 +2644,7 @@ int I210AlphaToARGBMatrix(const uint16_t* src_y,
int width) = I210AlphaToARGBRow_C;
void (*ARGBAttenuateRow)(const uint8_t* src_argb, uint8_t* dst_argb,
int width) = ARGBAttenuateRow_C;
+ assert(yuvconstants);
if (!src_y || !src_u || !src_v || !src_a || !dst_argb || width <= 0 ||
height == 0) {
return -1;
@@ -2470,6 +2703,11 @@ int I210AlphaToARGBMatrix(const uint16_t* src_y,
}
}
#endif
+#if defined(HAS_ARGBATTENUATEROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ ARGBAttenuateRow = ARGBAttenuateRow_RVV;
+ }
+#endif
for (y = 0; y < height; ++y) {
I210AlphaToARGBRow(src_y, src_u, src_v, src_a, dst_argb, yuvconstants,
@@ -2510,6 +2748,7 @@ int I410AlphaToARGBMatrix(const uint16_t* src_y,
int width) = I410AlphaToARGBRow_C;
void (*ARGBAttenuateRow)(const uint8_t* src_argb, uint8_t* dst_argb,
int width) = ARGBAttenuateRow_C;
+ assert(yuvconstants);
if (!src_y || !src_u || !src_v || !src_a || !dst_argb || width <= 0 ||
height == 0) {
return -1;
@@ -2568,6 +2807,11 @@ int I410AlphaToARGBMatrix(const uint16_t* src_y,
}
}
#endif
+#if defined(HAS_ARGBATTENUATEROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ ARGBAttenuateRow = ARGBAttenuateRow_RVV;
+ }
+#endif
for (y = 0; y < height; ++y) {
I410AlphaToARGBRow(src_y, src_u, src_v, src_a, dst_argb, yuvconstants,
@@ -2597,6 +2841,7 @@ int I400ToARGBMatrix(const uint8_t* src_y,
void (*I400ToARGBRow)(const uint8_t* y_buf, uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants, int width) =
I400ToARGBRow_C;
+ assert(yuvconstants);
if (!src_y || !dst_argb || width <= 0 || height == 0) {
return -1;
}
@@ -2652,6 +2897,11 @@ int I400ToARGBMatrix(const uint8_t* src_y,
}
}
#endif
+#if defined(HAS_I400TOARGBROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ I400ToARGBRow = I400ToARGBRow_RVV;
+ }
+#endif
for (y = 0; y < height; ++y) {
I400ToARGBRow(src_y, dst_argb, yuvconstants, width);
@@ -2739,6 +2989,12 @@ int J400ToARGB(const uint8_t* src_y,
}
}
#endif
+#if defined(HAS_J400TOARGBROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ J400ToARGBRow = J400ToARGBRow_RVV;
+ }
+#endif
+
for (y = 0; y < height; ++y) {
J400ToARGBRow(src_y, dst_argb, width);
src_y += src_stride_y;
@@ -2747,6 +3003,7 @@ int J400ToARGB(const uint8_t* src_y,
return 0;
}
+#ifndef __riscv
// Shuffle table for converting BGRA to ARGB.
static const uvec8 kShuffleMaskBGRAToARGB = {
3u, 2u, 1u, 0u, 7u, 6u, 5u, 4u, 11u, 10u, 9u, 8u, 15u, 14u, 13u, 12u};
@@ -2834,6 +3091,195 @@ int AR64ToAB64(const uint16_t* src_ar64,
return AR64Shuffle(src_ar64, src_stride_ar64, dst_ab64, dst_stride_ab64,
(const uint8_t*)&kShuffleMaskAR64ToAB64, width, height);
}
+#else
+// Convert BGRA to ARGB (same as ARGBToBGRA).
+LIBYUV_API
+int BGRAToARGB(const uint8_t* src_bgra,
+ int src_stride_bgra,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ int width,
+ int height) {
+ return ARGBToBGRA(src_bgra, src_stride_bgra, dst_argb, dst_stride_argb, width,
+ height);
+}
+
+// Convert ARGB to BGRA.
+LIBYUV_API
+int ARGBToBGRA(const uint8_t* src_argb,
+ int src_stride_argb,
+ uint8_t* dst_bgra,
+ int dst_stride_bgra,
+ int width,
+ int height) {
+ int y;
+ void (*ARGBToBGRARow)(const uint8_t* src_argb, uint8_t* dst_bgra, int width) =
+ ARGBToBGRARow_C;
+ if (!src_argb || !dst_bgra || width <= 0 || height == 0) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ src_argb = src_argb + (height - 1) * src_stride_argb;
+ src_stride_argb = -src_stride_argb;
+ }
+ // Coalesce rows.
+ if (src_stride_argb == width * 4 && dst_stride_bgra == width * 4) {
+ width *= height;
+ height = 1;
+ src_stride_argb = dst_stride_bgra = 0;
+ }
+
+#if defined(HAS_ARGBTOBGRAROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ ARGBToBGRARow = ARGBToBGRARow_RVV;
+ }
+#endif
+
+ for (y = 0; y < height; ++y) {
+ ARGBToBGRARow(src_argb, dst_bgra, width);
+ src_argb += src_stride_argb;
+ dst_bgra += dst_stride_bgra;
+ }
+ return 0;
+}
+
+// Convert ARGB to ABGR.
+LIBYUV_API
+int ARGBToABGR(const uint8_t* src_argb,
+ int src_stride_argb,
+ uint8_t* dst_abgr,
+ int dst_stride_abgr,
+ int width,
+ int height) {
+ int y;
+ void (*ARGBToABGRRow)(const uint8_t* src_argb, uint8_t* dst_abgr, int width) =
+ ARGBToABGRRow_C;
+ if (!src_argb || !dst_abgr || width <= 0 || height == 0) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ src_argb = src_argb + (height - 1) * src_stride_argb;
+ src_stride_argb = -src_stride_argb;
+ }
+ // Coalesce rows.
+ if (src_stride_argb == width * 4 && dst_stride_abgr == width * 4) {
+ width *= height;
+ height = 1;
+ src_stride_argb = dst_stride_abgr = 0;
+ }
+
+#if defined(HAS_ARGBTOABGRROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ ARGBToABGRRow = ARGBToABGRRow_RVV;
+ }
+#endif
+
+ for (y = 0; y < height; ++y) {
+ ARGBToABGRRow(src_argb, dst_abgr, width);
+ src_argb += src_stride_argb;
+ dst_abgr += dst_stride_abgr;
+ }
+ return 0;
+}
+
+// Convert ABGR to ARGB (same as ARGBToABGR).
+LIBYUV_API
+int ABGRToARGB(const uint8_t* src_abgr,
+ int src_stride_abgr,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ int width,
+ int height) {
+ return ARGBToABGR(src_abgr, src_stride_abgr, dst_argb, dst_stride_argb, width,
+ height);
+}
+
+// Convert RGBA to ARGB.
+LIBYUV_API
+int RGBAToARGB(const uint8_t* src_rgba,
+ int src_stride_rgba,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ int width,
+ int height) {
+ int y;
+ void (*RGBAToARGBRow)(const uint8_t* src_rgba, uint8_t* dst_argb, int width) =
+ RGBAToARGBRow_C;
+ if (!src_rgba || !dst_argb || width <= 0 || height == 0) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ src_rgba = src_rgba + (height - 1) * src_stride_rgba;
+ src_stride_rgba = -src_stride_rgba;
+ }
+ // Coalesce rows.
+ if (src_stride_rgba == width * 4 && dst_stride_argb == width * 4) {
+ width *= height;
+ height = 1;
+ src_stride_rgba = dst_stride_argb = 0;
+ }
+
+#if defined(HAS_RGBATOARGBROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ RGBAToARGBRow = RGBAToARGBRow_RVV;
+ }
+#endif
+
+ for (y = 0; y < height; ++y) {
+ RGBAToARGBRow(src_rgba, dst_argb, width);
+ src_rgba += src_stride_rgba;
+ dst_argb += dst_stride_argb;
+ }
+ return 0;
+}
+
+// Convert AR64 To AB64.
+LIBYUV_API
+int AR64ToAB64(const uint16_t* src_ar64,
+ int src_stride_ar64,
+ uint16_t* dst_ab64,
+ int dst_stride_ab64,
+ int width,
+ int height) {
+ int y;
+ void (*AR64ToAB64Row)(const uint16_t* src_ar64, uint16_t* dst_ab64,
+ int width) = AR64ToAB64Row_C;
+ if (!src_ar64 || !dst_ab64 || width <= 0 || height == 0) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ src_ar64 = src_ar64 + (height - 1) * src_stride_ar64;
+ src_stride_ar64 = -src_stride_ar64;
+ }
+ // Coalesce rows.
+ if (src_stride_ar64 == width * 4 && dst_stride_ab64 == width * 4) {
+ width *= height;
+ height = 1;
+ src_stride_ar64 = dst_stride_ab64 = 0;
+ }
+
+#if defined(HAS_AR64TOAB64ROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ AR64ToAB64Row = AR64ToAB64Row_RVV;
+ }
+#endif
+
+ for (y = 0; y < height; ++y) {
+ AR64ToAB64Row(src_ar64, dst_ab64, width);
+ src_ar64 += src_stride_ar64;
+ dst_ab64 += dst_stride_ab64;
+ }
+ return 0;
+}
+#endif
// Convert RGB24 to ARGB.
LIBYUV_API
@@ -2901,6 +3347,11 @@ int RGB24ToARGB(const uint8_t* src_rgb24,
}
}
#endif
+#if defined(HAS_RGB24TOARGBROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ RGB24ToARGBRow = RGB24ToARGBRow_RVV;
+ }
+#endif
for (y = 0; y < height; ++y) {
RGB24ToARGBRow(src_rgb24, dst_argb, width);
@@ -2976,6 +3427,11 @@ int RAWToARGB(const uint8_t* src_raw,
}
}
#endif
+#if defined(HAS_RAWTOARGBROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ RAWToARGBRow = RAWToARGBRow_RVV;
+ }
+#endif
for (y = 0; y < height; ++y) {
RAWToARGBRow(src_raw, dst_argb, width);
@@ -3027,6 +3483,11 @@ int RAWToRGBA(const uint8_t* src_raw,
}
}
#endif
+#if defined(HAS_RAWTORGBAROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ RAWToRGBARow = RAWToRGBARow_RVV;
+ }
+#endif
for (y = 0; y < height; ++y) {
RAWToRGBARow(src_raw, dst_rgba, width);
@@ -3431,6 +3892,11 @@ int AR64ToARGB(const uint16_t* src_ar64,
}
}
#endif
+#if defined(HAS_AR64TOARGBROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ AR64ToARGBRow = AR64ToARGBRow_RVV;
+ }
+#endif
for (y = 0; y < height; ++y) {
AR64ToARGBRow(src_ar64, dst_argb, width);
@@ -3490,6 +3956,11 @@ int AB64ToARGB(const uint16_t* src_ab64,
}
}
#endif
+#if defined(HAS_AB64TOARGBROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ AB64ToARGBRow = AB64ToARGBRow_RVV;
+ }
+#endif
for (y = 0; y < height; ++y) {
AB64ToARGBRow(src_ab64, dst_argb, width);
@@ -3514,6 +3985,7 @@ int NV12ToARGBMatrix(const uint8_t* src_y,
void (*NV12ToARGBRow)(
const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants, int width) = NV12ToARGBRow_C;
+ assert(yuvconstants);
if (!src_y || !src_uv || !dst_argb || width <= 0 || height == 0) {
return -1;
}
@@ -3571,6 +4043,11 @@ int NV12ToARGBMatrix(const uint8_t* src_y,
}
}
#endif
+#if defined(HAS_NV12TOARGBROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ NV12ToARGBRow = NV12ToARGBRow_RVV;
+ }
+#endif
for (y = 0; y < height; ++y) {
NV12ToARGBRow(src_y, src_uv, dst_argb, yuvconstants, width);
@@ -3598,6 +4075,7 @@ int NV21ToARGBMatrix(const uint8_t* src_y,
void (*NV21ToARGBRow)(
const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants, int width) = NV21ToARGBRow_C;
+ assert(yuvconstants);
if (!src_y || !src_vu || !dst_argb || width <= 0 || height == 0) {
return -1;
}
@@ -3655,6 +4133,11 @@ int NV21ToARGBMatrix(const uint8_t* src_y,
}
}
#endif
+#if defined(HAS_NV21TOARGBROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ NV21ToARGBRow = NV21ToARGBRow_RVV;
+ }
+#endif
for (y = 0; y < height; ++y) {
NV21ToARGBRow(src_y, src_vu, dst_argb, yuvconstants, width);
@@ -3741,6 +4224,7 @@ int NV12ToRGB24Matrix(const uint8_t* src_y,
void (*NV12ToRGB24Row)(
const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants, int width) = NV12ToRGB24Row_C;
+ assert(yuvconstants);
if (!src_y || !src_uv || !dst_rgb24 || width <= 0 || height == 0) {
return -1;
}
@@ -3774,6 +4258,11 @@ int NV12ToRGB24Matrix(const uint8_t* src_y,
}
}
#endif
+#if defined(HAS_NV12TORGB24ROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ NV12ToRGB24Row = NV12ToRGB24Row_RVV;
+ }
+#endif
for (y = 0; y < height; ++y) {
NV12ToRGB24Row(src_y, src_uv, dst_rgb24, yuvconstants, width);
@@ -3801,6 +4290,7 @@ int NV21ToRGB24Matrix(const uint8_t* src_y,
void (*NV21ToRGB24Row)(
const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants, int width) = NV21ToRGB24Row_C;
+ assert(yuvconstants);
if (!src_y || !src_vu || !dst_rgb24 || width <= 0 || height == 0) {
return -1;
}
@@ -3834,6 +4324,11 @@ int NV21ToRGB24Matrix(const uint8_t* src_y,
}
}
#endif
+#if defined(HAS_NV21TORGB24ROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ NV21ToRGB24Row = NV21ToRGB24Row_RVV;
+ }
+#endif
for (y = 0; y < height; ++y) {
NV21ToRGB24Row(src_y, src_vu, dst_rgb24, yuvconstants, width);
@@ -4143,6 +4638,7 @@ int Android420ToARGBMatrix(const uint8_t* src_y,
const ptrdiff_t vu_off = src_v - src_u;
int halfwidth = (width + 1) >> 1;
int halfheight = (height + 1) >> 1;
+ assert(yuvconstants);
if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) {
return -1;
}
@@ -4174,6 +4670,8 @@ int Android420ToARGBMatrix(const uint8_t* src_y,
// General case fallback creates NV12
align_buffer_64(plane_uv, halfwidth * 2 * halfheight);
+ if (!plane_uv)
+ return 1;
dst_uv = plane_uv;
for (y = 0; y < halfheight; ++y) {
WeavePixels(src_u, src_v, src_pixel_stride_uv, dst_uv, halfwidth);
@@ -4243,6 +4741,7 @@ int I422ToRGBAMatrix(const uint8_t* src_y,
const uint8_t* v_buf, uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants, int width) =
I422ToRGBARow_C;
+ assert(yuvconstants);
if (!src_y || !src_u || !src_v || !dst_rgba || width <= 0 || height == 0) {
return -1;
}
@@ -4284,6 +4783,14 @@ int I422ToRGBAMatrix(const uint8_t* src_y,
}
}
#endif
+#if defined(HAS_I422TORGBAROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ I422ToRGBARow = I422ToRGBARow_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ I422ToRGBARow = I422ToRGBARow_LSX;
+ }
+ }
+#endif
#if defined(HAS_I422TORGBAROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
I422ToRGBARow = I422ToRGBARow_Any_LASX;
@@ -4292,6 +4799,11 @@ int I422ToRGBAMatrix(const uint8_t* src_y,
}
}
#endif
+#if defined(HAS_I422TORGBAROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ I422ToRGBARow = I422ToRGBARow_RVV;
+ }
+#endif
for (y = 0; y < height; ++y) {
I422ToRGBARow(src_y, src_u, src_v, dst_rgba, yuvconstants, width);
@@ -4354,6 +4866,7 @@ int NV12ToRGB565Matrix(const uint8_t* src_y,
void (*NV12ToRGB565Row)(
const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants, int width) = NV12ToRGB565Row_C;
+ assert(yuvconstants);
if (!src_y || !src_uv || !dst_rgb565 || width <= 0 || height == 0) {
return -1;
}
@@ -4456,6 +4969,7 @@ int I420ToRGBAMatrix(const uint8_t* src_y,
const uint8_t* v_buf, uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants, int width) =
I422ToRGBARow_C;
+ assert(yuvconstants);
if (!src_y || !src_u || !src_v || !dst_rgba || width <= 0 || height == 0) {
return -1;
}
@@ -4497,6 +5011,14 @@ int I420ToRGBAMatrix(const uint8_t* src_y,
}
}
#endif
+#if defined(HAS_I422TORGBAROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ I422ToRGBARow = I422ToRGBARow_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ I422ToRGBARow = I422ToRGBARow_LSX;
+ }
+ }
+#endif
#if defined(HAS_I422TORGBAROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
I422ToRGBARow = I422ToRGBARow_Any_LASX;
@@ -4505,6 +5027,11 @@ int I420ToRGBAMatrix(const uint8_t* src_y,
}
}
#endif
+#if defined(HAS_I422TORGBAROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ I422ToRGBARow = I422ToRGBARow_RVV;
+ }
+#endif
for (y = 0; y < height; ++y) {
I422ToRGBARow(src_y, src_u, src_v, dst_rgba, yuvconstants, width);
@@ -4572,6 +5099,7 @@ int I420ToRGB24Matrix(const uint8_t* src_y,
const uint8_t* v_buf, uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants, int width) =
I422ToRGB24Row_C;
+ assert(yuvconstants);
if (!src_y || !src_u || !src_v || !dst_rgb24 || width <= 0 || height == 0) {
return -1;
}
@@ -4613,6 +5141,14 @@ int I420ToRGB24Matrix(const uint8_t* src_y,
}
}
#endif
+#if defined(HAS_I422TORGB24ROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ I422ToRGB24Row = I422ToRGB24Row_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ I422ToRGB24Row = I422ToRGB24Row_LSX;
+ }
+ }
+#endif
#if defined(HAS_I422TORGB24ROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
I422ToRGB24Row = I422ToRGB24Row_Any_LASX;
@@ -4621,6 +5157,11 @@ int I420ToRGB24Matrix(const uint8_t* src_y,
}
}
#endif
+#if defined(HAS_I422TORGB24ROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ I422ToRGB24Row = I422ToRGB24Row_RVV;
+ }
+#endif
for (y = 0; y < height; ++y) {
I422ToRGB24Row(src_y, src_u, src_v, dst_rgb24, yuvconstants, width);
@@ -4742,6 +5283,134 @@ int H420ToRAW(const uint8_t* src_y,
width, height);
}
+// Convert I422 to RGB24 with matrix.
+LIBYUV_API
+int I422ToRGB24Matrix(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_rgb24,
+ int dst_stride_rgb24,
+ const struct YuvConstants* yuvconstants,
+ int width,
+ int height) {
+ int y;
+ void (*I422ToRGB24Row)(const uint8_t* y_buf, const uint8_t* u_buf,
+ const uint8_t* v_buf, uint8_t* rgb_buf,
+ const struct YuvConstants* yuvconstants, int width) =
+ I422ToRGB24Row_C;
+ assert(yuvconstants);
+ if (!src_y || !src_u || !src_v || !dst_rgb24 || width <= 0 || height == 0) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ dst_rgb24 = dst_rgb24 + (height - 1) * dst_stride_rgb24;
+ dst_stride_rgb24 = -dst_stride_rgb24;
+ }
+#if defined(HAS_I422TORGB24ROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ I422ToRGB24Row = I422ToRGB24Row_Any_SSSE3;
+ if (IS_ALIGNED(width, 16)) {
+ I422ToRGB24Row = I422ToRGB24Row_SSSE3;
+ }
+ }
+#endif
+#if defined(HAS_I422TORGB24ROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ I422ToRGB24Row = I422ToRGB24Row_Any_AVX2;
+ if (IS_ALIGNED(width, 32)) {
+ I422ToRGB24Row = I422ToRGB24Row_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_I422TORGB24ROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ I422ToRGB24Row = I422ToRGB24Row_Any_NEON;
+ if (IS_ALIGNED(width, 8)) {
+ I422ToRGB24Row = I422ToRGB24Row_NEON;
+ }
+ }
+#endif
+#if defined(HAS_I422TORGB24ROW_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ I422ToRGB24Row = I422ToRGB24Row_Any_MSA;
+ if (IS_ALIGNED(width, 16)) {
+ I422ToRGB24Row = I422ToRGB24Row_MSA;
+ }
+ }
+#endif
+#if defined(HAS_I422TORGB24ROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ I422ToRGB24Row = I422ToRGB24Row_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ I422ToRGB24Row = I422ToRGB24Row_LSX;
+ }
+ }
+#endif
+#if defined(HAS_I422TORGB24ROW_LASX)
+ if (TestCpuFlag(kCpuHasLASX)) {
+ I422ToRGB24Row = I422ToRGB24Row_Any_LASX;
+ if (IS_ALIGNED(width, 32)) {
+ I422ToRGB24Row = I422ToRGB24Row_LASX;
+ }
+ }
+#endif
+#if defined(HAS_I422TORGB24ROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ I422ToRGB24Row = I422ToRGB24Row_RVV;
+ }
+#endif
+
+ for (y = 0; y < height; ++y) {
+ I422ToRGB24Row(src_y, src_u, src_v, dst_rgb24, yuvconstants, width);
+ dst_rgb24 += dst_stride_rgb24;
+ src_y += src_stride_y;
+ src_u += src_stride_u;
+ src_v += src_stride_v;
+ }
+ return 0;
+}
+
+// Convert I422 to RGB24.
+LIBYUV_API
+int I422ToRGB24(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_rgb24,
+ int dst_stride_rgb24,
+ int width,
+ int height) {
+ return I422ToRGB24Matrix(src_y, src_stride_y, src_u, src_stride_u, src_v,
+ src_stride_v, dst_rgb24, dst_stride_rgb24,
+ &kYuvI601Constants, width, height);
+}
+
+// Convert I422 to RAW.
+LIBYUV_API
+int I422ToRAW(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_raw,
+ int dst_stride_raw,
+ int width,
+ int height) {
+ return I422ToRGB24Matrix(src_y, src_stride_y, src_v,
+ src_stride_v, // Swap U and V
+ src_u, src_stride_u, dst_raw, dst_stride_raw,
+ &kYvuI601Constants, // Use Yvu matrix
+ width, height);
+}
+
// Convert I420 to ARGB1555.
LIBYUV_API
int I420ToARGB1555(const uint8_t* src_y,
@@ -4801,6 +5470,14 @@ int I420ToARGB1555(const uint8_t* src_y,
}
}
#endif
+#if defined(HAS_I422TOARGB1555ROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ I422ToARGB1555Row = I422ToARGB1555Row_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ I422ToARGB1555Row = I422ToARGB1555Row_LSX;
+ }
+ }
+#endif
#if defined(HAS_I422TOARGB1555ROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
I422ToARGB1555Row = I422ToARGB1555Row_Any_LASX;
@@ -4882,6 +5559,14 @@ int I420ToARGB4444(const uint8_t* src_y,
}
}
#endif
+#if defined(HAS_I422TOARGB4444ROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ I422ToARGB4444Row = I422ToARGB4444Row_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ I422ToARGB4444Row = I422ToARGB4444Row_LSX;
+ }
+ }
+#endif
#if defined(HAS_I422TOARGB4444ROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
I422ToARGB4444Row = I422ToARGB4444Row_Any_LASX;
@@ -4922,6 +5607,7 @@ int I420ToRGB565Matrix(const uint8_t* src_y,
const uint8_t* v_buf, uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants, int width) =
I422ToRGB565Row_C;
+ assert(yuvconstants);
if (!src_y || !src_u || !src_v || !dst_rgb565 || width <= 0 || height == 0) {
return -1;
}
@@ -4963,6 +5649,14 @@ int I420ToRGB565Matrix(const uint8_t* src_y,
}
}
#endif
+#if defined(HAS_I422TORGB565ROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ I422ToRGB565Row = I422ToRGB565Row_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ I422ToRGB565Row = I422ToRGB565Row_LSX;
+ }
+ }
+#endif
#if defined(HAS_I422TORGB565ROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
I422ToRGB565Row = I422ToRGB565Row_Any_LASX;
@@ -5035,23 +5729,25 @@ int H420ToRGB565(const uint8_t* src_y,
&kYuvH709Constants, width, height);
}
-// Convert I422 to RGB565.
+// Convert I422 to RGB565 with specified color matrix.
LIBYUV_API
-int I422ToRGB565(const uint8_t* src_y,
- int src_stride_y,
- const uint8_t* src_u,
- int src_stride_u,
- const uint8_t* src_v,
- int src_stride_v,
- uint8_t* dst_rgb565,
- int dst_stride_rgb565,
- int width,
- int height) {
+int I422ToRGB565Matrix(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_rgb565,
+ int dst_stride_rgb565,
+ const struct YuvConstants* yuvconstants,
+ int width,
+ int height) {
int y;
void (*I422ToRGB565Row)(const uint8_t* y_buf, const uint8_t* u_buf,
const uint8_t* v_buf, uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants, int width) =
I422ToRGB565Row_C;
+ assert(yuvconstants);
if (!src_y || !src_u || !src_v || !dst_rgb565 || width <= 0 || height == 0) {
return -1;
}
@@ -5093,6 +5789,14 @@ int I422ToRGB565(const uint8_t* src_y,
}
}
#endif
+#if defined(HAS_I422TORGB565ROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ I422ToRGB565Row = I422ToRGB565Row_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ I422ToRGB565Row = I422ToRGB565Row_LSX;
+ }
+ }
+#endif
#if defined(HAS_I422TORGB565ROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
I422ToRGB565Row = I422ToRGB565Row_Any_LASX;
@@ -5103,7 +5807,7 @@ int I422ToRGB565(const uint8_t* src_y,
#endif
for (y = 0; y < height; ++y) {
- I422ToRGB565Row(src_y, src_u, src_v, dst_rgb565, &kYuvI601Constants, width);
+ I422ToRGB565Row(src_y, src_u, src_v, dst_rgb565, yuvconstants, width);
dst_rgb565 += dst_stride_rgb565;
src_y += src_stride_y;
src_u += src_stride_u;
@@ -5112,6 +5816,23 @@ int I422ToRGB565(const uint8_t* src_y,
return 0;
}
+// Convert I422 to RGB565.
+LIBYUV_API
+int I422ToRGB565(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_rgb565,
+ int dst_stride_rgb565,
+ int width,
+ int height) {
+ return I422ToRGB565Matrix(src_y, src_stride_y, src_u, src_stride_u, src_v,
+ src_stride_v, dst_rgb565, dst_stride_rgb565,
+ &kYuvI601Constants, width, height);
+}
+
// Ordered 8x8 dither for 888 to 565. Values from 0 to 7.
static const uint8_t kDither565_4x4[16] = {
0, 4, 1, 5, 6, 2, 7, 3, 1, 5, 0, 4, 7, 3, 6, 2,
@@ -5136,7 +5857,7 @@ int I420ToRGB565Dither(const uint8_t* src_y,
const struct YuvConstants* yuvconstants, int width) =
I422ToARGBRow_C;
void (*ARGBToRGB565DitherRow)(const uint8_t* src_argb, uint8_t* dst_rgb,
- const uint32_t dither4, int width) =
+ uint32_t dither4, int width) =
ARGBToRGB565DitherRow_C;
if (!src_y || !src_u || !src_v || !dst_rgb565 || width <= 0 || height == 0) {
return -1;
@@ -5191,6 +5912,14 @@ int I420ToRGB565Dither(const uint8_t* src_y,
}
}
#endif
+#if defined(HAS_I422TOARGBROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ I422ToARGBRow = I422ToARGBRow_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ I422ToARGBRow = I422ToARGBRow_LSX;
+ }
+ }
+#endif
#if defined(HAS_I422TOARGBROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
I422ToARGBRow = I422ToARGBRow_Any_LASX;
@@ -5199,6 +5928,11 @@ int I420ToRGB565Dither(const uint8_t* src_y,
}
}
#endif
+#if defined(HAS_I422TOARGBROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ I422ToARGBRow = I422ToARGBRow_RVV;
+ }
+#endif
#if defined(HAS_ARGBTORGB565DITHERROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_Any_SSE2;
@@ -5231,6 +5965,14 @@ int I420ToRGB565Dither(const uint8_t* src_y,
}
}
#endif
+#if defined(HAS_ARGBTORGB565DITHERROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_Any_LSX;
+ if (IS_ALIGNED(width, 8)) {
+ ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_LSX;
+ }
+ }
+#endif
#if defined(HAS_ARGBTORGB565DITHERROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_Any_LASX;
@@ -5242,6 +5984,8 @@ int I420ToRGB565Dither(const uint8_t* src_y,
{
// Allocate a row of argb.
align_buffer_64(row_argb, width * 4);
+ if (!row_argb)
+ return 1;
for (y = 0; y < height; ++y) {
I422ToARGBRow(src_y, src_u, src_v, row_argb, &kYuvI601Constants, width);
ARGBToRGB565DitherRow(row_argb, dst_rgb565,
@@ -5278,6 +6022,7 @@ int I420ToAR30Matrix(const uint8_t* src_y,
const struct YuvConstants* yuvconstants, int width) =
I422ToAR30Row_C;
+ assert(yuvconstants);
if (!src_y || !src_u || !src_v || !dst_ar30 || width <= 0 || height == 0) {
return -1;
}
@@ -5401,9 +6146,12 @@ static int I420ToARGBMatrixBilinear(const uint8_t* src_y,
const uint8_t* v_buf, uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants, int width) =
I444ToARGBRow_C;
- void (*Scale2RowUp)(const uint8_t* src_ptr, ptrdiff_t src_stride,
- uint8_t* dst_ptr, ptrdiff_t dst_stride, int dst_width) =
- ScaleRowUp2_Bilinear_Any_C;
+ void (*Scale2RowUp_Bilinear)(const uint8_t* src_ptr, ptrdiff_t src_stride,
+ uint8_t* dst_ptr, ptrdiff_t dst_stride,
+ int dst_width) = ScaleRowUp2_Bilinear_Any_C;
+ void (*ScaleRowUp2_Linear)(const uint8_t* src_ptr, uint8_t* dst_ptr,
+ int dst_width) = ScaleRowUp2_Linear_Any_C;
+ assert(yuvconstants);
if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) {
return -1;
}
@@ -5453,48 +6201,65 @@ static int I420ToARGBMatrixBilinear(const uint8_t* src_y,
}
}
#endif
+#if defined(HAS_I444TOARGBROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ I444ToARGBRow = I444ToARGBRow_RVV;
+ }
+#endif
-#if defined(HAS_SCALEROWUP2_LINEAR_SSE2)
+#if defined(HAS_SCALEROWUP2_BILINEAR_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
- Scale2RowUp = ScaleRowUp2_Bilinear_Any_SSE2;
+ Scale2RowUp_Bilinear = ScaleRowUp2_Bilinear_Any_SSE2;
+ ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_SSE2;
}
#endif
-#if defined(HAS_SCALEROWUP2_LINEAR_SSSE3)
+#if defined(HAS_SCALEROWUP2_BILINEAR_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
- Scale2RowUp = ScaleRowUp2_Bilinear_Any_SSSE3;
+ Scale2RowUp_Bilinear = ScaleRowUp2_Bilinear_Any_SSSE3;
+ ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_SSSE3;
}
#endif
-#if defined(HAS_SCALEROWUP2_LINEAR_AVX2)
+#if defined(HAS_SCALEROWUP2_BILINEAR_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
- Scale2RowUp = ScaleRowUp2_Bilinear_Any_AVX2;
+ Scale2RowUp_Bilinear = ScaleRowUp2_Bilinear_Any_AVX2;
+ ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_AVX2;
}
#endif
-#if defined(HAS_SCALEROWUP2_LINEAR_NEON)
+#if defined(HAS_SCALEROWUP2_BILINEAR_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
- Scale2RowUp = ScaleRowUp2_Bilinear_Any_NEON;
+ Scale2RowUp_Bilinear = ScaleRowUp2_Bilinear_Any_NEON;
+ ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_NEON;
+ }
+#endif
+#if defined(HAS_SCALEROWUP2_BILINEAR_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ Scale2RowUp_Bilinear = ScaleRowUp2_Bilinear_RVV;
+ ScaleRowUp2_Linear = ScaleRowUp2_Linear_RVV;
}
#endif
// alloc 4 lines temp
- const int kRowSize = (width + 31) & ~31;
- align_buffer_64(row, kRowSize * 4);
+ const int row_size = (width + 31) & ~31;
+ align_buffer_64(row, row_size * 4);
uint8_t* temp_u_1 = row;
- uint8_t* temp_u_2 = row + kRowSize;
- uint8_t* temp_v_1 = row + kRowSize * 2;
- uint8_t* temp_v_2 = row + kRowSize * 3;
-
- Scale2RowUp(src_u, 0, temp_u_1, kRowSize, width);
- Scale2RowUp(src_v, 0, temp_v_1, kRowSize, width);
+ uint8_t* temp_u_2 = row + row_size;
+ uint8_t* temp_v_1 = row + row_size * 2;
+ uint8_t* temp_v_2 = row + row_size * 3;
+ if (!row)
+ return 1;
+
+ ScaleRowUp2_Linear(src_u, temp_u_1, width);
+ ScaleRowUp2_Linear(src_v, temp_v_1, width);
I444ToARGBRow(src_y, temp_u_1, temp_v_1, dst_argb, yuvconstants, width);
dst_argb += dst_stride_argb;
src_y += src_stride_y;
for (y = 0; y < height - 2; y += 2) {
- Scale2RowUp(src_u, src_stride_u, temp_u_1, kRowSize, width);
- Scale2RowUp(src_v, src_stride_v, temp_v_1, kRowSize, width);
+ Scale2RowUp_Bilinear(src_u, src_stride_u, temp_u_1, row_size, width);
+ Scale2RowUp_Bilinear(src_v, src_stride_v, temp_v_1, row_size, width);
I444ToARGBRow(src_y, temp_u_1, temp_v_1, dst_argb, yuvconstants, width);
dst_argb += dst_stride_argb;
src_y += src_stride_y;
@@ -5506,8 +6271,8 @@ static int I420ToARGBMatrixBilinear(const uint8_t* src_y,
}
if (!(height & 1)) {
- Scale2RowUp(src_u, 0, temp_u_1, kRowSize, width);
- Scale2RowUp(src_v, 0, temp_v_1, kRowSize, width);
+ ScaleRowUp2_Linear(src_u, temp_u_1, width);
+ ScaleRowUp2_Linear(src_v, temp_v_1, width);
I444ToARGBRow(src_y, temp_u_1, temp_v_1, dst_argb, yuvconstants, width);
}
@@ -5531,8 +6296,9 @@ static int I422ToARGBMatrixLinear(const uint8_t* src_y,
const uint8_t* v_buf, uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants, int width) =
I444ToARGBRow_C;
- void (*ScaleRowUp)(const uint8_t* src_ptr, uint8_t* dst_ptr, int dst_width) =
- ScaleRowUp2_Linear_Any_C;
+ void (*ScaleRowUp2_Linear)(const uint8_t* src_ptr, uint8_t* dst_ptr,
+ int dst_width) = ScaleRowUp2_Linear_Any_C;
+ assert(yuvconstants);
if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) {
return -1;
}
@@ -5582,36 +6348,48 @@ static int I422ToARGBMatrixLinear(const uint8_t* src_y,
}
}
#endif
+#if defined(HAS_I444TOARGBROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ I444ToARGBRow = I444ToARGBRow_RVV;
+ }
+#endif
#if defined(HAS_SCALEROWUP2_LINEAR_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
- ScaleRowUp = ScaleRowUp2_Linear_Any_SSE2;
+ ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_SSE2;
}
#endif
#if defined(HAS_SCALEROWUP2_LINEAR_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
- ScaleRowUp = ScaleRowUp2_Linear_Any_SSSE3;
+ ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_SSSE3;
}
#endif
#if defined(HAS_SCALEROWUP2_LINEAR_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
- ScaleRowUp = ScaleRowUp2_Linear_Any_AVX2;
+ ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_AVX2;
}
#endif
#if defined(HAS_SCALEROWUP2_LINEAR_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
- ScaleRowUp = ScaleRowUp2_Linear_Any_NEON;
+ ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_NEON;
+ }
+#endif
+#if defined(HAS_SCALEROWUP2_LINEAR_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ ScaleRowUp2_Linear = ScaleRowUp2_Linear_RVV;
}
#endif
// alloc 2 lines temp
- const int kRowSize = (width + 31) & ~31;
- align_buffer_64(row, kRowSize * 2);
+ const int row_size = (width + 31) & ~31;
+ align_buffer_64(row, row_size * 2);
uint8_t* temp_u = row;
- uint8_t* temp_v = row + kRowSize;
+ uint8_t* temp_v = row + row_size;
+ if (!row)
+ return 1;
for (y = 0; y < height; ++y) {
- ScaleRowUp(src_u, temp_u, width);
- ScaleRowUp(src_v, temp_v, width);
+ ScaleRowUp2_Linear(src_u, temp_u, width);
+ ScaleRowUp2_Linear(src_v, temp_v, width);
I444ToARGBRow(src_y, temp_u, temp_v, dst_argb, yuvconstants, width);
dst_argb += dst_stride_argb;
src_y += src_stride_y;
@@ -5623,6 +6401,156 @@ static int I422ToARGBMatrixLinear(const uint8_t* src_y,
return 0;
}
+static int I420ToRGB24MatrixBilinear(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_rgb24,
+ int dst_stride_rgb24,
+ const struct YuvConstants* yuvconstants,
+ int width,
+ int height) {
+ int y;
+ void (*I444ToRGB24Row)(const uint8_t* y_buf, const uint8_t* u_buf,
+ const uint8_t* v_buf, uint8_t* rgb_buf,
+ const struct YuvConstants* yuvconstants, int width) =
+ I444ToRGB24Row_C;
+ void (*Scale2RowUp_Bilinear)(const uint8_t* src_ptr, ptrdiff_t src_stride,
+ uint8_t* dst_ptr, ptrdiff_t dst_stride,
+ int dst_width) = ScaleRowUp2_Bilinear_Any_C;
+ void (*ScaleRowUp2_Linear)(const uint8_t* src_ptr, uint8_t* dst_ptr,
+ int dst_width) = ScaleRowUp2_Linear_Any_C;
+ assert(yuvconstants);
+ if (!src_y || !src_u || !src_v || !dst_rgb24 || width <= 0 || height == 0) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ dst_rgb24 = dst_rgb24 + (height - 1) * dst_stride_rgb24;
+ dst_stride_rgb24 = -dst_stride_rgb24;
+ }
+#if defined(HAS_I444TORGB24ROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ I444ToRGB24Row = I444ToRGB24Row_Any_SSSE3;
+ if (IS_ALIGNED(width, 16)) {
+ I444ToRGB24Row = I444ToRGB24Row_SSSE3;
+ }
+ }
+#endif
+#if defined(HAS_I444TORGB24ROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ I444ToRGB24Row = I444ToRGB24Row_Any_AVX2;
+ if (IS_ALIGNED(width, 32)) {
+ I444ToRGB24Row = I444ToRGB24Row_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_I444TORGB24ROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ I444ToRGB24Row = I444ToRGB24Row_Any_NEON;
+ if (IS_ALIGNED(width, 8)) {
+ I444ToRGB24Row = I444ToRGB24Row_NEON;
+ }
+ }
+#endif
+#if defined(HAS_I444TORGB24ROW_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ I444ToRGB24Row = I444ToRGB24Row_Any_MSA;
+ if (IS_ALIGNED(width, 8)) {
+ I444ToRGB24Row = I444ToRGB24Row_MSA;
+ }
+ }
+#endif
+#if defined(HAS_I444TORGB24ROW_LASX)
+ if (TestCpuFlag(kCpuHasLASX)) {
+ I444ToRGB24Row = I444ToRGB24Row_Any_LASX;
+ if (IS_ALIGNED(width, 32)) {
+ I444ToRGB24Row = I444ToRGB24Row_LASX;
+ }
+ }
+#endif
+#if defined(HAS_I444TORGB24ROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ I444ToRGB24Row = I444ToRGB24Row_RVV;
+ }
+#endif
+
+#if defined(HAS_SCALEROWUP2_BILINEAR_SSE2)
+ if (TestCpuFlag(kCpuHasSSE2)) {
+ Scale2RowUp_Bilinear = ScaleRowUp2_Bilinear_Any_SSE2;
+ ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_SSE2;
+ }
+#endif
+
+#if defined(HAS_SCALEROWUP2_BILINEAR_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ Scale2RowUp_Bilinear = ScaleRowUp2_Bilinear_Any_SSSE3;
+ ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_SSSE3;
+ }
+#endif
+
+#if defined(HAS_SCALEROWUP2_BILINEAR_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ Scale2RowUp_Bilinear = ScaleRowUp2_Bilinear_Any_AVX2;
+ ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_AVX2;
+ }
+#endif
+
+#if defined(HAS_SCALEROWUP2_BILINEAR_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ Scale2RowUp_Bilinear = ScaleRowUp2_Bilinear_Any_NEON;
+ ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_NEON;
+ }
+#endif
+#if defined(HAS_SCALEROWUP2_BILINEAR_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ Scale2RowUp_Bilinear = ScaleRowUp2_Bilinear_RVV;
+ ScaleRowUp2_Linear = ScaleRowUp2_Linear_RVV;
+ }
+#endif
+
+ // alloc 4 lines temp
+ const int row_size = (width + 31) & ~31;
+ align_buffer_64(row, row_size * 4);
+ uint8_t* temp_u_1 = row;
+ uint8_t* temp_u_2 = row + row_size;
+ uint8_t* temp_v_1 = row + row_size * 2;
+ uint8_t* temp_v_2 = row + row_size * 3;
+ if (!row)
+ return 1;
+
+ ScaleRowUp2_Linear(src_u, temp_u_1, width);
+ ScaleRowUp2_Linear(src_v, temp_v_1, width);
+ I444ToRGB24Row(src_y, temp_u_1, temp_v_1, dst_rgb24, yuvconstants, width);
+ dst_rgb24 += dst_stride_rgb24;
+ src_y += src_stride_y;
+
+ for (y = 0; y < height - 2; y += 2) {
+ Scale2RowUp_Bilinear(src_u, src_stride_u, temp_u_1, row_size, width);
+ Scale2RowUp_Bilinear(src_v, src_stride_v, temp_v_1, row_size, width);
+ I444ToRGB24Row(src_y, temp_u_1, temp_v_1, dst_rgb24, yuvconstants, width);
+ dst_rgb24 += dst_stride_rgb24;
+ src_y += src_stride_y;
+ I444ToRGB24Row(src_y, temp_u_2, temp_v_2, dst_rgb24, yuvconstants, width);
+ dst_rgb24 += dst_stride_rgb24;
+ src_y += src_stride_y;
+ src_u += src_stride_u;
+ src_v += src_stride_v;
+ }
+
+ if (!(height & 1)) {
+ ScaleRowUp2_Linear(src_u, temp_u_1, width);
+ ScaleRowUp2_Linear(src_v, temp_v_1, width);
+ I444ToRGB24Row(src_y, temp_u_1, temp_v_1, dst_rgb24, yuvconstants, width);
+ }
+
+ free_aligned_buffer_64(row);
+ return 0;
+}
+
static int I010ToAR30MatrixBilinear(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
@@ -5639,9 +6567,12 @@ static int I010ToAR30MatrixBilinear(const uint16_t* src_y,
const uint16_t* v_buf, uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants, int width) =
I410ToAR30Row_C;
- void (*Scale2RowUp)(const uint16_t* src_ptr, ptrdiff_t src_stride,
- uint16_t* dst_ptr, ptrdiff_t dst_stride, int dst_width) =
- ScaleRowUp2_Bilinear_16_Any_C;
+ void (*Scale2RowUp_Bilinear_12)(
+ const uint16_t* src_ptr, ptrdiff_t src_stride, uint16_t* dst_ptr,
+ ptrdiff_t dst_stride, int dst_width) = ScaleRowUp2_Bilinear_16_Any_C;
+ void (*ScaleRowUp2_Linear_12)(const uint16_t* src_ptr, uint16_t* dst_ptr,
+ int dst_width) = ScaleRowUp2_Linear_16_Any_C;
+ assert(yuvconstants);
if (!src_y || !src_u || !src_v || !dst_ar30 || width <= 0 || height == 0) {
return -1;
}
@@ -5668,41 +6599,46 @@ static int I010ToAR30MatrixBilinear(const uint16_t* src_y,
}
#endif
-#if defined(HAS_SCALEROWUP2_LINEAR_12_SSSE3)
+#if defined(HAS_SCALEROWUP2_BILINEAR_12_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
- Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_SSSE3;
+ Scale2RowUp_Bilinear_12 = ScaleRowUp2_Bilinear_12_Any_SSSE3;
+ ScaleRowUp2_Linear_12 = ScaleRowUp2_Linear_12_Any_SSSE3;
}
#endif
-#if defined(HAS_SCALEROWUP2_LINEAR_12_AVX2)
+#if defined(HAS_SCALEROWUP2_BILINEAR_12_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
- Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_AVX2;
+ Scale2RowUp_Bilinear_12 = ScaleRowUp2_Bilinear_12_Any_AVX2;
+ ScaleRowUp2_Linear_12 = ScaleRowUp2_Linear_12_Any_AVX2;
}
#endif
-#if defined(HAS_SCALEROWUP2_LINEAR_12_NEON)
+#if defined(HAS_SCALEROWUP2_BILINEAR_12_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
- Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_NEON;
+ Scale2RowUp_Bilinear_12 = ScaleRowUp2_Bilinear_12_Any_NEON;
+ ScaleRowUp2_Linear_12 = ScaleRowUp2_Linear_12_Any_NEON;
}
#endif
// alloc 4 lines temp
- const int kRowSize = (width + 31) & ~31;
- align_buffer_64(row, kRowSize * 4 * sizeof(uint16_t));
+ const int row_size = (width + 31) & ~31;
+ align_buffer_64(row, row_size * 4 * sizeof(uint16_t));
uint16_t* temp_u_1 = (uint16_t*)(row);
- uint16_t* temp_u_2 = (uint16_t*)(row) + kRowSize;
- uint16_t* temp_v_1 = (uint16_t*)(row) + kRowSize * 2;
- uint16_t* temp_v_2 = (uint16_t*)(row) + kRowSize * 3;
-
- Scale2RowUp(src_u, 0, temp_u_1, kRowSize, width);
- Scale2RowUp(src_v, 0, temp_v_1, kRowSize, width);
+ uint16_t* temp_u_2 = (uint16_t*)(row) + row_size;
+ uint16_t* temp_v_1 = (uint16_t*)(row) + row_size * 2;
+ uint16_t* temp_v_2 = (uint16_t*)(row) + row_size * 3;
+ if (!row)
+ return 1;
+
+ ScaleRowUp2_Linear_12(src_u, temp_u_1, width);
+ ScaleRowUp2_Linear_12(src_v, temp_v_1, width);
I410ToAR30Row(src_y, temp_u_1, temp_v_1, dst_ar30, yuvconstants, width);
dst_ar30 += dst_stride_ar30;
src_y += src_stride_y;
for (y = 0; y < height - 2; y += 2) {
- Scale2RowUp(src_u, src_stride_u, temp_u_1, kRowSize, width);
- Scale2RowUp(src_v, src_stride_v, temp_v_1, kRowSize, width);
+ Scale2RowUp_Bilinear_12(src_u, src_stride_u, temp_u_1, row_size, width);
+ Scale2RowUp_Bilinear_12(src_v, src_stride_v, temp_v_1, row_size, width);
I410ToAR30Row(src_y, temp_u_1, temp_v_1, dst_ar30, yuvconstants, width);
dst_ar30 += dst_stride_ar30;
src_y += src_stride_y;
@@ -5714,8 +6650,8 @@ static int I010ToAR30MatrixBilinear(const uint16_t* src_y,
}
if (!(height & 1)) {
- Scale2RowUp(src_u, 0, temp_u_1, kRowSize, width);
- Scale2RowUp(src_v, 0, temp_v_1, kRowSize, width);
+ ScaleRowUp2_Linear_12(src_u, temp_u_1, width);
+ ScaleRowUp2_Linear_12(src_v, temp_v_1, width);
I410ToAR30Row(src_y, temp_u_1, temp_v_1, dst_ar30, yuvconstants, width);
}
@@ -5740,8 +6676,9 @@ static int I210ToAR30MatrixLinear(const uint16_t* src_y,
const uint16_t* v_buf, uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants, int width) =
I410ToAR30Row_C;
- void (*ScaleRowUp)(const uint16_t* src_ptr, uint16_t* dst_ptr,
- int dst_width) = ScaleRowUp2_Linear_16_Any_C;
+ void (*ScaleRowUp2_Linear_12)(const uint16_t* src_ptr, uint16_t* dst_ptr,
+ int dst_width) = ScaleRowUp2_Linear_16_Any_C;
+ assert(yuvconstants);
if (!src_y || !src_u || !src_v || !dst_ar30 || width <= 0 || height == 0) {
return -1;
}
@@ -5770,29 +6707,31 @@ static int I210ToAR30MatrixLinear(const uint16_t* src_y,
#if defined(HAS_SCALEROWUP2_LINEAR_12_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
- ScaleRowUp = ScaleRowUp2_Linear_12_Any_SSSE3;
+ ScaleRowUp2_Linear_12 = ScaleRowUp2_Linear_12_Any_SSSE3;
}
#endif
#if defined(HAS_SCALEROWUP2_LINEAR_12_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
- ScaleRowUp = ScaleRowUp2_Linear_12_Any_AVX2;
+ ScaleRowUp2_Linear_12 = ScaleRowUp2_Linear_12_Any_AVX2;
}
#endif
#if defined(HAS_SCALEROWUP2_LINEAR_12_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
- ScaleRowUp = ScaleRowUp2_Linear_12_Any_NEON;
+ ScaleRowUp2_Linear_12 = ScaleRowUp2_Linear_12_Any_NEON;
}
#endif
// alloc 2 lines temp
- const int kRowSize = (width + 31) & ~31;
- align_buffer_64(row, kRowSize * 2 * sizeof(uint16_t));
+ const int row_size = (width + 31) & ~31;
+ align_buffer_64(row, row_size * 2 * sizeof(uint16_t));
uint16_t* temp_u = (uint16_t*)(row);
- uint16_t* temp_v = (uint16_t*)(row) + kRowSize;
+ uint16_t* temp_v = (uint16_t*)(row) + row_size;
+ if (!row)
+ return 1;
for (y = 0; y < height; ++y) {
- ScaleRowUp(src_u, temp_u, width);
- ScaleRowUp(src_v, temp_v, width);
+ ScaleRowUp2_Linear_12(src_u, temp_u, width);
+ ScaleRowUp2_Linear_12(src_v, temp_v, width);
I410ToAR30Row(src_y, temp_u, temp_v, dst_ar30, yuvconstants, width);
dst_ar30 += dst_stride_ar30;
src_y += src_stride_y;
@@ -5819,9 +6758,12 @@ static int I010ToARGBMatrixBilinear(const uint16_t* src_y,
const uint16_t* v_buf, uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants, int width) =
I410ToARGBRow_C;
- void (*Scale2RowUp)(const uint16_t* src_ptr, ptrdiff_t src_stride,
- uint16_t* dst_ptr, ptrdiff_t dst_stride, int dst_width) =
- ScaleRowUp2_Bilinear_16_Any_C;
+ void (*Scale2RowUp_Bilinear_12)(
+ const uint16_t* src_ptr, ptrdiff_t src_stride, uint16_t* dst_ptr,
+ ptrdiff_t dst_stride, int dst_width) = ScaleRowUp2_Bilinear_16_Any_C;
+ void (*ScaleRowUp2_Linear_12)(const uint16_t* src_ptr, uint16_t* dst_ptr,
+ int dst_width) = ScaleRowUp2_Linear_16_Any_C;
+ assert(yuvconstants);
if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) {
return -1;
}
@@ -5848,41 +6790,46 @@ static int I010ToARGBMatrixBilinear(const uint16_t* src_y,
}
#endif
-#if defined(HAS_SCALEROWUP2_LINEAR_12_SSSE3)
+#if defined(HAS_SCALEROWUP2_BILINEAR_12_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
- Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_SSSE3;
+ Scale2RowUp_Bilinear_12 = ScaleRowUp2_Bilinear_12_Any_SSSE3;
+ ScaleRowUp2_Linear_12 = ScaleRowUp2_Linear_12_Any_SSSE3;
}
#endif
-#if defined(HAS_SCALEROWUP2_LINEAR_12_AVX2)
+#if defined(HAS_SCALEROWUP2_BILINEAR_12_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
- Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_AVX2;
+ Scale2RowUp_Bilinear_12 = ScaleRowUp2_Bilinear_12_Any_AVX2;
+ ScaleRowUp2_Linear_12 = ScaleRowUp2_Linear_12_Any_AVX2;
}
#endif
-#if defined(HAS_SCALEROWUP2_LINEAR_12_NEON)
+#if defined(HAS_SCALEROWUP2_BILINEAR_12_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
- Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_NEON;
+ Scale2RowUp_Bilinear_12 = ScaleRowUp2_Bilinear_12_Any_NEON;
+ ScaleRowUp2_Linear_12 = ScaleRowUp2_Linear_12_Any_NEON;
}
#endif
// alloc 4 lines temp
- const int kRowSize = (width + 31) & ~31;
- align_buffer_64(row, kRowSize * 4 * sizeof(uint16_t));
+ const int row_size = (width + 31) & ~31;
+ align_buffer_64(row, row_size * 4 * sizeof(uint16_t));
uint16_t* temp_u_1 = (uint16_t*)(row);
- uint16_t* temp_u_2 = (uint16_t*)(row) + kRowSize;
- uint16_t* temp_v_1 = (uint16_t*)(row) + kRowSize * 2;
- uint16_t* temp_v_2 = (uint16_t*)(row) + kRowSize * 3;
-
- Scale2RowUp(src_u, 0, temp_u_1, kRowSize, width);
- Scale2RowUp(src_v, 0, temp_v_1, kRowSize, width);
+ uint16_t* temp_u_2 = (uint16_t*)(row) + row_size;
+ uint16_t* temp_v_1 = (uint16_t*)(row) + row_size * 2;
+ uint16_t* temp_v_2 = (uint16_t*)(row) + row_size * 3;
+ if (!row)
+ return 1;
+
+ ScaleRowUp2_Linear_12(src_u, temp_u_1, width);
+ ScaleRowUp2_Linear_12(src_v, temp_v_1, width);
I410ToARGBRow(src_y, temp_u_1, temp_v_1, dst_argb, yuvconstants, width);
dst_argb += dst_stride_argb;
src_y += src_stride_y;
for (y = 0; y < height - 2; y += 2) {
- Scale2RowUp(src_u, src_stride_u, temp_u_1, kRowSize, width);
- Scale2RowUp(src_v, src_stride_v, temp_v_1, kRowSize, width);
+ Scale2RowUp_Bilinear_12(src_u, src_stride_u, temp_u_1, row_size, width);
+ Scale2RowUp_Bilinear_12(src_v, src_stride_v, temp_v_1, row_size, width);
I410ToARGBRow(src_y, temp_u_1, temp_v_1, dst_argb, yuvconstants, width);
dst_argb += dst_stride_argb;
src_y += src_stride_y;
@@ -5894,8 +6841,8 @@ static int I010ToARGBMatrixBilinear(const uint16_t* src_y,
}
if (!(height & 1)) {
- Scale2RowUp(src_u, 0, temp_u_1, kRowSize, width);
- Scale2RowUp(src_v, 0, temp_v_1, kRowSize, width);
+ ScaleRowUp2_Linear_12(src_u, temp_u_1, width);
+ ScaleRowUp2_Linear_12(src_v, temp_v_1, width);
I410ToARGBRow(src_y, temp_u_1, temp_v_1, dst_argb, yuvconstants, width);
}
@@ -5919,8 +6866,9 @@ static int I210ToARGBMatrixLinear(const uint16_t* src_y,
const uint16_t* v_buf, uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants, int width) =
I410ToARGBRow_C;
- void (*ScaleRowUp)(const uint16_t* src_ptr, uint16_t* dst_ptr,
- int dst_width) = ScaleRowUp2_Linear_16_Any_C;
+ void (*ScaleRowUp2_Linear_12)(const uint16_t* src_ptr, uint16_t* dst_ptr,
+ int dst_width) = ScaleRowUp2_Linear_16_Any_C;
+ assert(yuvconstants);
if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) {
return -1;
}
@@ -5949,29 +6897,31 @@ static int I210ToARGBMatrixLinear(const uint16_t* src_y,
#if defined(HAS_SCALEROWUP2_LINEAR_12_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
- ScaleRowUp = ScaleRowUp2_Linear_12_Any_SSSE3;
+ ScaleRowUp2_Linear_12 = ScaleRowUp2_Linear_12_Any_SSSE3;
}
#endif
#if defined(HAS_SCALEROWUP2_LINEAR_12_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
- ScaleRowUp = ScaleRowUp2_Linear_12_Any_AVX2;
+ ScaleRowUp2_Linear_12 = ScaleRowUp2_Linear_12_Any_AVX2;
}
#endif
#if defined(HAS_SCALEROWUP2_LINEAR_12_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
- ScaleRowUp = ScaleRowUp2_Linear_12_Any_NEON;
+ ScaleRowUp2_Linear_12 = ScaleRowUp2_Linear_12_Any_NEON;
}
#endif
// alloc 2 lines temp
- const int kRowSize = (width + 31) & ~31;
- align_buffer_64(row, kRowSize * 2 * sizeof(uint16_t));
+ const int row_size = (width + 31) & ~31;
+ align_buffer_64(row, row_size * 2 * sizeof(uint16_t));
uint16_t* temp_u = (uint16_t*)(row);
- uint16_t* temp_v = (uint16_t*)(row) + kRowSize;
+ uint16_t* temp_v = (uint16_t*)(row) + row_size;
+ if (!row)
+ return 1;
for (y = 0; y < height; ++y) {
- ScaleRowUp(src_u, temp_u, width);
- ScaleRowUp(src_v, temp_v, width);
+ ScaleRowUp2_Linear_12(src_u, temp_u, width);
+ ScaleRowUp2_Linear_12(src_v, temp_v, width);
I410ToARGBRow(src_y, temp_u, temp_v, dst_argb, yuvconstants, width);
dst_argb += dst_stride_argb;
src_y += src_stride_y;
@@ -6006,9 +6956,12 @@ static int I420AlphaToARGBMatrixBilinear(
int width) = I444AlphaToARGBRow_C;
void (*ARGBAttenuateRow)(const uint8_t* src_argb, uint8_t* dst_argb,
int width) = ARGBAttenuateRow_C;
- void (*Scale2RowUp)(const uint8_t* src_ptr, ptrdiff_t src_stride,
- uint8_t* dst_ptr, ptrdiff_t dst_stride, int dst_width) =
- ScaleRowUp2_Bilinear_Any_C;
+ void (*Scale2RowUp_Bilinear)(const uint8_t* src_ptr, ptrdiff_t src_stride,
+ uint8_t* dst_ptr, ptrdiff_t dst_stride,
+ int dst_width) = ScaleRowUp2_Bilinear_Any_C;
+ void (*ScaleRowUp2_Linear)(const uint8_t* src_ptr, uint8_t* dst_ptr,
+ int dst_width) = ScaleRowUp2_Linear_Any_C;
+ assert(yuvconstants);
if (!src_y || !src_u || !src_v || !src_a || !dst_argb || width <= 0 ||
height == 0) {
return -1;
@@ -6059,6 +7012,11 @@ static int I420AlphaToARGBMatrixBilinear(
}
}
#endif
+#if defined(HAS_I444ALPHATOARGBROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ I444AlphaToARGBRow = I444AlphaToARGBRow_RVV;
+ }
+#endif
#if defined(HAS_ARGBATTENUATEROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBAttenuateRow = ARGBAttenuateRow_Any_SSSE3;
@@ -6091,40 +7049,58 @@ static int I420AlphaToARGBMatrixBilinear(
}
}
#endif
-#if defined(HAS_SCALEROWUP2_LINEAR_SSE2)
+#if defined(HAS_ARGBATTENUATEROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ ARGBAttenuateRow = ARGBAttenuateRow_RVV;
+ }
+#endif
+
+#if defined(HAS_SCALEROWUP2_BILINEAR_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
- Scale2RowUp = ScaleRowUp2_Bilinear_Any_SSE2;
+ Scale2RowUp_Bilinear = ScaleRowUp2_Bilinear_Any_SSE2;
+ ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_SSE2;
}
#endif
-#if defined(HAS_SCALEROWUP2_LINEAR_SSSE3)
+#if defined(HAS_SCALEROWUP2_BILINEAR_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
- Scale2RowUp = ScaleRowUp2_Bilinear_Any_SSSE3;
+ Scale2RowUp_Bilinear = ScaleRowUp2_Bilinear_Any_SSSE3;
+ ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_SSSE3;
}
#endif
-#if defined(HAS_SCALEROWUP2_LINEAR_AVX2)
+#if defined(HAS_SCALEROWUP2_BILINEAR_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
- Scale2RowUp = ScaleRowUp2_Bilinear_Any_AVX2;
+ Scale2RowUp_Bilinear = ScaleRowUp2_Bilinear_Any_AVX2;
+ ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_AVX2;
}
#endif
-#if defined(HAS_SCALEROWUP2_LINEAR_NEON)
+#if defined(HAS_SCALEROWUP2_BILINEAR_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
- Scale2RowUp = ScaleRowUp2_Bilinear_Any_NEON;
+ Scale2RowUp_Bilinear = ScaleRowUp2_Bilinear_Any_NEON;
+ ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_NEON;
+ }
+#endif
+#if defined(HAS_SCALEROWUP2_BILINEAR_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ Scale2RowUp_Bilinear = ScaleRowUp2_Bilinear_RVV;
+ ScaleRowUp2_Linear = ScaleRowUp2_Linear_RVV;
}
#endif
// alloc 4 lines temp
- const int kRowSize = (width + 31) & ~31;
- align_buffer_64(row, kRowSize * 4);
+ const int row_size = (width + 31) & ~31;
+ align_buffer_64(row, row_size * 4);
uint8_t* temp_u_1 = row;
- uint8_t* temp_u_2 = row + kRowSize;
- uint8_t* temp_v_1 = row + kRowSize * 2;
- uint8_t* temp_v_2 = row + kRowSize * 3;
-
- Scale2RowUp(src_u, 0, temp_u_1, kRowSize, width);
- Scale2RowUp(src_v, 0, temp_v_1, kRowSize, width);
+ uint8_t* temp_u_2 = row + row_size;
+ uint8_t* temp_v_1 = row + row_size * 2;
+ uint8_t* temp_v_2 = row + row_size * 3;
+ if (!row)
+ return 1;
+
+ ScaleRowUp2_Linear(src_u, temp_u_1, width);
+ ScaleRowUp2_Linear(src_v, temp_v_1, width);
I444AlphaToARGBRow(src_y, temp_u_1, temp_v_1, src_a, dst_argb, yuvconstants,
width);
if (attenuate) {
@@ -6135,8 +7111,8 @@ static int I420AlphaToARGBMatrixBilinear(
src_a += src_stride_a;
for (y = 0; y < height - 2; y += 2) {
- Scale2RowUp(src_u, src_stride_u, temp_u_1, kRowSize, width);
- Scale2RowUp(src_v, src_stride_v, temp_v_1, kRowSize, width);
+ Scale2RowUp_Bilinear(src_u, src_stride_u, temp_u_1, row_size, width);
+ Scale2RowUp_Bilinear(src_v, src_stride_v, temp_v_1, row_size, width);
I444AlphaToARGBRow(src_y, temp_u_1, temp_v_1, src_a, dst_argb, yuvconstants,
width);
if (attenuate) {
@@ -6158,8 +7134,8 @@ static int I420AlphaToARGBMatrixBilinear(
}
if (!(height & 1)) {
- Scale2RowUp(src_u, 0, temp_u_1, kRowSize, width);
- Scale2RowUp(src_v, 0, temp_v_1, kRowSize, width);
+ ScaleRowUp2_Linear(src_u, temp_u_1, width);
+ ScaleRowUp2_Linear(src_v, temp_v_1, width);
I444AlphaToARGBRow(src_y, temp_u_1, temp_v_1, src_a, dst_argb, yuvconstants,
width);
if (attenuate) {
@@ -6193,8 +7169,9 @@ static int I422AlphaToARGBMatrixLinear(const uint8_t* src_y,
int width) = I444AlphaToARGBRow_C;
void (*ARGBAttenuateRow)(const uint8_t* src_argb, uint8_t* dst_argb,
int width) = ARGBAttenuateRow_C;
- void (*ScaleRowUp)(const uint8_t* src_ptr, uint8_t* dst_ptr, int dst_width) =
- ScaleRowUp2_Linear_Any_C;
+ void (*ScaleRowUp2_Linear)(const uint8_t* src_ptr, uint8_t* dst_ptr,
+ int dst_width) = ScaleRowUp2_Linear_Any_C;
+ assert(yuvconstants);
if (!src_y || !src_u || !src_v || !src_a || !dst_argb || width <= 0 ||
height == 0) {
return -1;
@@ -6245,6 +7222,11 @@ static int I422AlphaToARGBMatrixLinear(const uint8_t* src_y,
}
}
#endif
+#if defined(HAS_I444ALPHATOARGBROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ I444AlphaToARGBRow = I444AlphaToARGBRow_RVV;
+ }
+#endif
#if defined(HAS_ARGBATTENUATEROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBAttenuateRow = ARGBAttenuateRow_Any_SSSE3;
@@ -6277,36 +7259,49 @@ static int I422AlphaToARGBMatrixLinear(const uint8_t* src_y,
}
}
#endif
+#if defined(HAS_ARGBATTENUATEROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ ARGBAttenuateRow = ARGBAttenuateRow_RVV;
+ }
+#endif
+
#if defined(HAS_SCALEROWUP2_LINEAR_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
- ScaleRowUp = ScaleRowUp2_Linear_Any_SSE2;
+ ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_SSE2;
}
#endif
#if defined(HAS_SCALEROWUP2_LINEAR_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
- ScaleRowUp = ScaleRowUp2_Linear_Any_SSSE3;
+ ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_SSSE3;
}
#endif
#if defined(HAS_SCALEROWUP2_LINEAR_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
- ScaleRowUp = ScaleRowUp2_Linear_Any_AVX2;
+ ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_AVX2;
}
#endif
#if defined(HAS_SCALEROWUP2_LINEAR_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
- ScaleRowUp = ScaleRowUp2_Linear_Any_NEON;
+ ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_NEON;
+ }
+#endif
+#if defined(HAS_SCALEROWUP2_LINEAR_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ ScaleRowUp2_Linear = ScaleRowUp2_Linear_RVV;
}
#endif
// alloc 2 lines temp
- const int kRowSize = (width + 31) & ~31;
- align_buffer_64(row, kRowSize * 2);
+ const int row_size = (width + 31) & ~31;
+ align_buffer_64(row, row_size * 2);
uint8_t* temp_u = row;
- uint8_t* temp_v = row + kRowSize;
+ uint8_t* temp_v = row + row_size;
+ if (!row)
+ return 1;
for (y = 0; y < height; ++y) {
- ScaleRowUp(src_u, temp_u, width);
- ScaleRowUp(src_v, temp_v, width);
+ ScaleRowUp2_Linear(src_u, temp_u, width);
+ ScaleRowUp2_Linear(src_v, temp_v, width);
I444AlphaToARGBRow(src_y, temp_u, temp_v, src_a, dst_argb, yuvconstants,
width);
if (attenuate) {
@@ -6346,9 +7341,12 @@ static int I010AlphaToARGBMatrixBilinear(
int width) = I410AlphaToARGBRow_C;
void (*ARGBAttenuateRow)(const uint8_t* src_argb, uint8_t* dst_argb,
int width) = ARGBAttenuateRow_C;
- void (*Scale2RowUp)(const uint16_t* src_ptr, ptrdiff_t src_stride,
- uint16_t* dst_ptr, ptrdiff_t dst_stride, int dst_width) =
- ScaleRowUp2_Bilinear_16_Any_C;
+ void (*Scale2RowUp_Bilinear_12)(
+ const uint16_t* src_ptr, ptrdiff_t src_stride, uint16_t* dst_ptr,
+ ptrdiff_t dst_stride, int dst_width) = ScaleRowUp2_Bilinear_16_Any_C;
+ void (*ScaleRowUp2_Linear_12)(const uint16_t* src_ptr, uint16_t* dst_ptr,
+ int dst_width) = ScaleRowUp2_Linear_16_Any_C;
+ assert(yuvconstants);
if (!src_y || !src_u || !src_v || !src_a || !dst_argb || width <= 0 ||
height == 0) {
return -1;
@@ -6407,35 +7405,45 @@ static int I010AlphaToARGBMatrixBilinear(
}
}
#endif
+#if defined(HAS_ARGBATTENUATEROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ ARGBAttenuateRow = ARGBAttenuateRow_RVV;
+ }
+#endif
-#if defined(HAS_SCALEROWUP2_LINEAR_12_SSSE3)
+#if defined(HAS_SCALEROWUP2_BILINEAR_12_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
- Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_SSSE3;
+ Scale2RowUp_Bilinear_12 = ScaleRowUp2_Bilinear_12_Any_SSSE3;
+ ScaleRowUp2_Linear_12 = ScaleRowUp2_Linear_12_Any_SSSE3;
}
#endif
-#if defined(HAS_SCALEROWUP2_LINEAR_12_AVX2)
+#if defined(HAS_SCALEROWUP2_BILINEAR_12_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
- Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_AVX2;
+ Scale2RowUp_Bilinear_12 = ScaleRowUp2_Bilinear_12_Any_AVX2;
+ ScaleRowUp2_Linear_12 = ScaleRowUp2_Linear_12_Any_AVX2;
}
#endif
-#if defined(HAS_SCALEROWUP2_LINEAR_12_NEON)
+#if defined(HAS_SCALEROWUP2_BILINEAR_12_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
- Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_NEON;
+ Scale2RowUp_Bilinear_12 = ScaleRowUp2_Bilinear_12_Any_NEON;
+ ScaleRowUp2_Linear_12 = ScaleRowUp2_Linear_12_Any_NEON;
}
#endif
// alloc 4 lines temp
- const int kRowSize = (width + 31) & ~31;
- align_buffer_64(row, kRowSize * 4 * sizeof(uint16_t));
+ const int row_size = (width + 31) & ~31;
+ align_buffer_64(row, row_size * 4 * sizeof(uint16_t));
uint16_t* temp_u_1 = (uint16_t*)(row);
- uint16_t* temp_u_2 = (uint16_t*)(row) + kRowSize;
- uint16_t* temp_v_1 = (uint16_t*)(row) + kRowSize * 2;
- uint16_t* temp_v_2 = (uint16_t*)(row) + kRowSize * 3;
-
- Scale2RowUp(src_u, 0, temp_u_1, kRowSize, width);
- Scale2RowUp(src_v, 0, temp_v_1, kRowSize, width);
+ uint16_t* temp_u_2 = (uint16_t*)(row) + row_size;
+ uint16_t* temp_v_1 = (uint16_t*)(row) + row_size * 2;
+ uint16_t* temp_v_2 = (uint16_t*)(row) + row_size * 3;
+ if (!row)
+ return 1;
+
+ ScaleRowUp2_Linear_12(src_u, temp_u_1, width);
+ ScaleRowUp2_Linear_12(src_v, temp_v_1, width);
I410AlphaToARGBRow(src_y, temp_u_1, temp_v_1, src_a, dst_argb, yuvconstants,
width);
if (attenuate) {
@@ -6446,8 +7454,8 @@ static int I010AlphaToARGBMatrixBilinear(
src_a += src_stride_a;
for (y = 0; y < height - 2; y += 2) {
- Scale2RowUp(src_u, src_stride_u, temp_u_1, kRowSize, width);
- Scale2RowUp(src_v, src_stride_v, temp_v_1, kRowSize, width);
+ Scale2RowUp_Bilinear_12(src_u, src_stride_u, temp_u_1, row_size, width);
+ Scale2RowUp_Bilinear_12(src_v, src_stride_v, temp_v_1, row_size, width);
I410AlphaToARGBRow(src_y, temp_u_1, temp_v_1, src_a, dst_argb, yuvconstants,
width);
if (attenuate) {
@@ -6469,8 +7477,8 @@ static int I010AlphaToARGBMatrixBilinear(
}
if (!(height & 1)) {
- Scale2RowUp(src_u, 0, temp_u_1, kRowSize, width);
- Scale2RowUp(src_v, 0, temp_v_1, kRowSize, width);
+ ScaleRowUp2_Linear_12(src_u, temp_u_1, width);
+ ScaleRowUp2_Linear_12(src_v, temp_v_1, width);
I410AlphaToARGBRow(src_y, temp_u_1, temp_v_1, src_a, dst_argb, yuvconstants,
width);
if (attenuate) {
@@ -6504,8 +7512,9 @@ static int I210AlphaToARGBMatrixLinear(const uint16_t* src_y,
int width) = I410AlphaToARGBRow_C;
void (*ARGBAttenuateRow)(const uint8_t* src_argb, uint8_t* dst_argb,
int width) = ARGBAttenuateRow_C;
- void (*ScaleRowUp)(const uint16_t* src_ptr, uint16_t* dst_ptr,
- int dst_width) = ScaleRowUp2_Linear_16_Any_C;
+ void (*ScaleRowUp2_Linear)(const uint16_t* src_ptr, uint16_t* dst_ptr,
+ int dst_width) = ScaleRowUp2_Linear_16_Any_C;
+ assert(yuvconstants);
if (!src_y || !src_u || !src_v || !src_a || !dst_argb || width <= 0 ||
height == 0) {
return -1;
@@ -6564,32 +7573,39 @@ static int I210AlphaToARGBMatrixLinear(const uint16_t* src_y,
}
}
#endif
+#if defined(HAS_ARGBATTENUATEROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ ARGBAttenuateRow = ARGBAttenuateRow_RVV;
+ }
+#endif
#if defined(HAS_SCALEROWUP2_LINEAR_12_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
- ScaleRowUp = ScaleRowUp2_Linear_12_Any_SSSE3;
+ ScaleRowUp2_Linear = ScaleRowUp2_Linear_12_Any_SSSE3;
}
#endif
#if defined(HAS_SCALEROWUP2_LINEAR_12_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
- ScaleRowUp = ScaleRowUp2_Linear_12_Any_AVX2;
+ ScaleRowUp2_Linear = ScaleRowUp2_Linear_12_Any_AVX2;
}
#endif
#if defined(HAS_SCALEROWUP2_LINEAR_12_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
- ScaleRowUp = ScaleRowUp2_Linear_12_Any_NEON;
+ ScaleRowUp2_Linear = ScaleRowUp2_Linear_12_Any_NEON;
}
#endif
// alloc 2 lines temp
- const int kRowSize = (width + 31) & ~31;
- align_buffer_64(row, kRowSize * 2 * sizeof(uint16_t));
+ const int row_size = (width + 31) & ~31;
+ align_buffer_64(row, row_size * 2 * sizeof(uint16_t));
uint16_t* temp_u = (uint16_t*)(row);
- uint16_t* temp_v = (uint16_t*)(row) + kRowSize;
+ uint16_t* temp_v = (uint16_t*)(row) + row_size;
+ if (!row)
+ return 1;
for (y = 0; y < height; ++y) {
- ScaleRowUp(src_u, temp_u, width);
- ScaleRowUp(src_v, temp_v, width);
+ ScaleRowUp2_Linear(src_u, temp_u, width);
+ ScaleRowUp2_Linear(src_v, temp_v, width);
I410AlphaToARGBRow(src_y, temp_u, temp_v, src_a, dst_argb, yuvconstants,
width);
if (attenuate) {
@@ -6618,9 +7634,10 @@ static int P010ToARGBMatrixBilinear(const uint16_t* src_y,
void (*P410ToARGBRow)(
const uint16_t* y_buf, const uint16_t* uv_buf, uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants, int width) = P410ToARGBRow_C;
- void (*Scale2RowUp)(const uint16_t* src_ptr, ptrdiff_t src_stride,
- uint16_t* dst_ptr, ptrdiff_t dst_stride, int dst_width) =
- ScaleUVRowUp2_Bilinear_16_Any_C;
+ void (*Scale2RowUp_Bilinear_16)(
+ const uint16_t* src_ptr, ptrdiff_t src_stride, uint16_t* dst_ptr,
+ ptrdiff_t dst_stride, int dst_width) = ScaleUVRowUp2_Bilinear_16_Any_C;
+ assert(yuvconstants);
if (!src_y || !src_uv || !dst_argb || width <= 0 || height == 0) {
return -1;
}
@@ -6649,35 +7666,37 @@ static int P010ToARGBMatrixBilinear(const uint16_t* src_y,
#ifdef HAS_SCALEUVROWUP2_BILINEAR_16_SSE41
if (TestCpuFlag(kCpuHasSSE41)) {
- Scale2RowUp = ScaleUVRowUp2_Bilinear_16_Any_SSE41;
+ Scale2RowUp_Bilinear_16 = ScaleUVRowUp2_Bilinear_16_Any_SSE41;
}
#endif
#ifdef HAS_SCALEUVROWUP2_BILINEAR_16_AVX2
if (TestCpuFlag(kCpuHasAVX2)) {
- Scale2RowUp = ScaleUVRowUp2_Bilinear_16_Any_AVX2;
+ Scale2RowUp_Bilinear_16 = ScaleUVRowUp2_Bilinear_16_Any_AVX2;
}
#endif
#ifdef HAS_SCALEUVROWUP2_BILINEAR_16_NEON
if (TestCpuFlag(kCpuHasNEON)) {
- Scale2RowUp = ScaleUVRowUp2_Bilinear_16_Any_NEON;
+ Scale2RowUp_Bilinear_16 = ScaleUVRowUp2_Bilinear_16_Any_NEON;
}
#endif
// alloc 2 lines temp
- const int kRowSize = (2 * width + 31) & ~31;
- align_buffer_64(row, kRowSize * 2 * sizeof(uint16_t));
+ const int row_size = (2 * width + 31) & ~31;
+ align_buffer_64(row, row_size * 2 * sizeof(uint16_t));
uint16_t* temp_uv_1 = (uint16_t*)(row);
- uint16_t* temp_uv_2 = (uint16_t*)(row) + kRowSize;
+ uint16_t* temp_uv_2 = (uint16_t*)(row) + row_size;
+ if (!row)
+ return 1;
- Scale2RowUp(src_uv, 0, temp_uv_1, kRowSize, width);
+ Scale2RowUp_Bilinear_16(src_uv, 0, temp_uv_1, row_size, width);
P410ToARGBRow(src_y, temp_uv_1, dst_argb, yuvconstants, width);
dst_argb += dst_stride_argb;
src_y += src_stride_y;
for (y = 0; y < height - 2; y += 2) {
- Scale2RowUp(src_uv, src_stride_uv, temp_uv_1, kRowSize, width);
+ Scale2RowUp_Bilinear_16(src_uv, src_stride_uv, temp_uv_1, row_size, width);
P410ToARGBRow(src_y, temp_uv_1, dst_argb, yuvconstants, width);
dst_argb += dst_stride_argb;
src_y += src_stride_y;
@@ -6688,7 +7707,7 @@ static int P010ToARGBMatrixBilinear(const uint16_t* src_y,
}
if (!(height & 1)) {
- Scale2RowUp(src_uv, 0, temp_uv_1, kRowSize, width);
+ Scale2RowUp_Bilinear_16(src_uv, 0, temp_uv_1, row_size, width);
P410ToARGBRow(src_y, temp_uv_1, dst_argb, yuvconstants, width);
}
@@ -6709,8 +7728,9 @@ static int P210ToARGBMatrixLinear(const uint16_t* src_y,
void (*P410ToARGBRow)(
const uint16_t* y_buf, const uint16_t* uv_buf, uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants, int width) = P410ToARGBRow_C;
- void (*ScaleRowUp)(const uint16_t* src_uv, uint16_t* dst_uv, int dst_width) =
- ScaleUVRowUp2_Linear_16_Any_C;
+ void (*ScaleRowUp2_Linear)(const uint16_t* src_uv, uint16_t* dst_uv,
+ int dst_width) = ScaleUVRowUp2_Linear_16_Any_C;
+ assert(yuvconstants);
if (!src_y || !src_uv || !dst_argb || width <= 0 || height == 0) {
return -1;
}
@@ -6739,28 +7759,30 @@ static int P210ToARGBMatrixLinear(const uint16_t* src_y,
#ifdef HAS_SCALEUVROWUP2_LINEAR_16_SSE41
if (TestCpuFlag(kCpuHasSSE41)) {
- ScaleRowUp = ScaleUVRowUp2_Linear_16_Any_SSE41;
+ ScaleRowUp2_Linear = ScaleUVRowUp2_Linear_16_Any_SSE41;
}
#endif
#ifdef HAS_SCALEUVROWUP2_LINEAR_16_AVX2
if (TestCpuFlag(kCpuHasAVX2)) {
- ScaleRowUp = ScaleUVRowUp2_Linear_16_Any_AVX2;
+ ScaleRowUp2_Linear = ScaleUVRowUp2_Linear_16_Any_AVX2;
}
#endif
#ifdef HAS_SCALEUVROWUP2_LINEAR_16_NEON
if (TestCpuFlag(kCpuHasNEON)) {
- ScaleRowUp = ScaleUVRowUp2_Linear_16_Any_NEON;
+ ScaleRowUp2_Linear = ScaleUVRowUp2_Linear_16_Any_NEON;
}
#endif
- const int kRowSize = (2 * width + 31) & ~31;
- align_buffer_64(row, kRowSize * sizeof(uint16_t));
+ const int row_size = (2 * width + 31) & ~31;
+ align_buffer_64(row, row_size * sizeof(uint16_t));
uint16_t* temp_uv = (uint16_t*)(row);
+ if (!row)
+ return 1;
for (y = 0; y < height; ++y) {
- ScaleRowUp(src_uv, temp_uv, width);
+ ScaleRowUp2_Linear(src_uv, temp_uv, width);
P410ToARGBRow(src_y, temp_uv, dst_argb, yuvconstants, width);
dst_argb += dst_stride_argb;
src_y += src_stride_y;
@@ -6784,9 +7806,10 @@ static int P010ToAR30MatrixBilinear(const uint16_t* src_y,
void (*P410ToAR30Row)(
const uint16_t* y_buf, const uint16_t* uv_buf, uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants, int width) = P410ToAR30Row_C;
- void (*Scale2RowUp)(const uint16_t* src_ptr, ptrdiff_t src_stride,
- uint16_t* dst_ptr, ptrdiff_t dst_stride, int dst_width) =
- ScaleUVRowUp2_Bilinear_16_Any_C;
+ void (*Scale2RowUp_Bilinear_16)(
+ const uint16_t* src_ptr, ptrdiff_t src_stride, uint16_t* dst_ptr,
+ ptrdiff_t dst_stride, int dst_width) = ScaleUVRowUp2_Bilinear_16_Any_C;
+ assert(yuvconstants);
if (!src_y || !src_uv || !dst_ar30 || width <= 0 || height == 0) {
return -1;
}
@@ -6815,35 +7838,37 @@ static int P010ToAR30MatrixBilinear(const uint16_t* src_y,
#ifdef HAS_SCALEUVROWUP2_BILINEAR_16_SSE41
if (TestCpuFlag(kCpuHasSSE41)) {
- Scale2RowUp = ScaleUVRowUp2_Bilinear_16_Any_SSE41;
+ Scale2RowUp_Bilinear_16 = ScaleUVRowUp2_Bilinear_16_Any_SSE41;
}
#endif
#ifdef HAS_SCALEUVROWUP2_BILINEAR_16_AVX2
if (TestCpuFlag(kCpuHasAVX2)) {
- Scale2RowUp = ScaleUVRowUp2_Bilinear_16_Any_AVX2;
+ Scale2RowUp_Bilinear_16 = ScaleUVRowUp2_Bilinear_16_Any_AVX2;
}
#endif
#ifdef HAS_SCALEUVROWUP2_BILINEAR_16_NEON
if (TestCpuFlag(kCpuHasNEON)) {
- Scale2RowUp = ScaleUVRowUp2_Bilinear_16_Any_NEON;
+ Scale2RowUp_Bilinear_16 = ScaleUVRowUp2_Bilinear_16_Any_NEON;
}
#endif
// alloc 2 lines temp
- const int kRowSize = (2 * width + 31) & ~31;
- align_buffer_64(row, kRowSize * 2 * sizeof(uint16_t));
+ const int row_size = (2 * width + 31) & ~31;
+ align_buffer_64(row, row_size * 2 * sizeof(uint16_t));
uint16_t* temp_uv_1 = (uint16_t*)(row);
- uint16_t* temp_uv_2 = (uint16_t*)(row) + kRowSize;
+ uint16_t* temp_uv_2 = (uint16_t*)(row) + row_size;
+ if (!row)
+ return 1;
- Scale2RowUp(src_uv, 0, temp_uv_1, kRowSize, width);
+ Scale2RowUp_Bilinear_16(src_uv, 0, temp_uv_1, row_size, width);
P410ToAR30Row(src_y, temp_uv_1, dst_ar30, yuvconstants, width);
dst_ar30 += dst_stride_ar30;
src_y += src_stride_y;
for (y = 0; y < height - 2; y += 2) {
- Scale2RowUp(src_uv, src_stride_uv, temp_uv_1, kRowSize, width);
+ Scale2RowUp_Bilinear_16(src_uv, src_stride_uv, temp_uv_1, row_size, width);
P410ToAR30Row(src_y, temp_uv_1, dst_ar30, yuvconstants, width);
dst_ar30 += dst_stride_ar30;
src_y += src_stride_y;
@@ -6854,7 +7879,7 @@ static int P010ToAR30MatrixBilinear(const uint16_t* src_y,
}
if (!(height & 1)) {
- Scale2RowUp(src_uv, 0, temp_uv_1, kRowSize, width);
+ Scale2RowUp_Bilinear_16(src_uv, 0, temp_uv_1, row_size, width);
P410ToAR30Row(src_y, temp_uv_1, dst_ar30, yuvconstants, width);
}
@@ -6875,8 +7900,9 @@ static int P210ToAR30MatrixLinear(const uint16_t* src_y,
void (*P410ToAR30Row)(
const uint16_t* y_buf, const uint16_t* uv_buf, uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants, int width) = P410ToAR30Row_C;
- void (*ScaleRowUp)(const uint16_t* src_uv, uint16_t* dst_uv, int dst_width) =
- ScaleUVRowUp2_Linear_16_Any_C;
+ void (*ScaleRowUp2_Linear)(const uint16_t* src_uv, uint16_t* dst_uv,
+ int dst_width) = ScaleUVRowUp2_Linear_16_Any_C;
+ assert(yuvconstants);
if (!src_y || !src_uv || !dst_ar30 || width <= 0 || height == 0) {
return -1;
}
@@ -6905,28 +7931,30 @@ static int P210ToAR30MatrixLinear(const uint16_t* src_y,
#ifdef HAS_SCALEUVROWUP2_LINEAR_16_SSE41
if (TestCpuFlag(kCpuHasSSE41)) {
- ScaleRowUp = ScaleUVRowUp2_Linear_16_Any_SSE41;
+ ScaleRowUp2_Linear = ScaleUVRowUp2_Linear_16_Any_SSE41;
}
#endif
#ifdef HAS_SCALEUVROWUP2_LINEAR_16_AVX2
if (TestCpuFlag(kCpuHasAVX2)) {
- ScaleRowUp = ScaleUVRowUp2_Linear_16_Any_AVX2;
+ ScaleRowUp2_Linear = ScaleUVRowUp2_Linear_16_Any_AVX2;
}
#endif
#ifdef HAS_SCALEUVROWUP2_LINEAR_16_NEON
if (TestCpuFlag(kCpuHasNEON)) {
- ScaleRowUp = ScaleUVRowUp2_Linear_16_Any_NEON;
+ ScaleRowUp2_Linear = ScaleUVRowUp2_Linear_16_Any_NEON;
}
#endif
- const int kRowSize = (2 * width + 31) & ~31;
- align_buffer_64(row, kRowSize * sizeof(uint16_t));
+ const int row_size = (2 * width + 31) & ~31;
+ align_buffer_64(row, row_size * sizeof(uint16_t));
uint16_t* temp_uv = (uint16_t*)(row);
+ if (!row)
+ return 1;
for (y = 0; y < height; ++y) {
- ScaleRowUp(src_uv, temp_uv, width);
+ ScaleRowUp2_Linear(src_uv, temp_uv, width);
P410ToAR30Row(src_y, temp_uv, dst_ar30, yuvconstants, width);
dst_ar30 += dst_stride_ar30;
src_y += src_stride_y;
@@ -6937,6 +7965,140 @@ static int P210ToAR30MatrixLinear(const uint16_t* src_y,
return 0;
}
+static int I422ToRGB24MatrixLinear(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_rgb24,
+ int dst_stride_rgb24,
+ const struct YuvConstants* yuvconstants,
+ int width,
+ int height) {
+ int y;
+ void (*I444ToRGB24Row)(const uint8_t* y_buf, const uint8_t* u_buf,
+ const uint8_t* v_buf, uint8_t* rgb_buf,
+ const struct YuvConstants* yuvconstants, int width) =
+ I444ToRGB24Row_C;
+ void (*ScaleRowUp2_Linear)(const uint8_t* src_ptr, uint8_t* dst_ptr,
+ int dst_width) = ScaleRowUp2_Linear_Any_C;
+ assert(yuvconstants);
+ if (!src_y || !src_u || !src_v || !dst_rgb24 || width <= 0 || height == 0) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ dst_rgb24 = dst_rgb24 + (height - 1) * dst_stride_rgb24;
+ dst_stride_rgb24 = -dst_stride_rgb24;
+ }
+#if defined(HAS_I444TORGB24ROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ I444ToRGB24Row = I444ToRGB24Row_Any_SSSE3;
+ if (IS_ALIGNED(width, 16)) {
+ I444ToRGB24Row = I444ToRGB24Row_SSSE3;
+ }
+ }
+#endif
+#if defined(HAS_I444TORGB24ROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ I444ToRGB24Row = I444ToRGB24Row_Any_AVX2;
+ if (IS_ALIGNED(width, 32)) {
+ I444ToRGB24Row = I444ToRGB24Row_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_I444TORGB24ROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ I444ToRGB24Row = I444ToRGB24Row_Any_NEON;
+ if (IS_ALIGNED(width, 8)) {
+ I444ToRGB24Row = I444ToRGB24Row_NEON;
+ }
+ }
+#endif
+#if defined(HAS_I444TORGB24ROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ I444ToRGB24Row = I444ToRGB24Row_RVV;
+ }
+#endif
+#if defined(HAS_SCALEROWUP2_LINEAR_SSE2)
+ if (TestCpuFlag(kCpuHasSSE2)) {
+ ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_SSE2;
+ }
+#endif
+#if defined(HAS_SCALEROWUP2_LINEAR_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_SSSE3;
+ }
+#endif
+#if defined(HAS_SCALEROWUP2_LINEAR_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_AVX2;
+ }
+#endif
+#if defined(HAS_SCALEROWUP2_LINEAR_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_NEON;
+ }
+#endif
+#if defined(HAS_SCALEROWUP2_LINEAR_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ ScaleRowUp2_Linear = ScaleRowUp2_Linear_RVV;
+ }
+#endif
+
+ // alloc 2 lines temp
+ const int row_size = (width + 31) & ~31;
+ align_buffer_64(row, row_size * 2);
+ uint8_t* temp_u = row;
+ uint8_t* temp_v = row + row_size;
+ if (!row)
+ return 1;
+
+ for (y = 0; y < height; ++y) {
+ ScaleRowUp2_Linear(src_u, temp_u, width);
+ ScaleRowUp2_Linear(src_v, temp_v, width);
+ I444ToRGB24Row(src_y, temp_u, temp_v, dst_rgb24, yuvconstants, width);
+ dst_rgb24 += dst_stride_rgb24;
+ src_y += src_stride_y;
+ src_u += src_stride_u;
+ src_v += src_stride_v;
+ }
+
+ free_aligned_buffer_64(row);
+ return 0;
+}
+
+LIBYUV_API
+int I422ToRGB24MatrixFilter(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_rgb24,
+ int dst_stride_rgb24,
+ const struct YuvConstants* yuvconstants,
+ int width,
+ int height,
+ enum FilterMode filter) {
+ switch (filter) {
+ case kFilterNone:
+ return I422ToRGB24Matrix(src_y, src_stride_y, src_u, src_stride_u, src_v,
+ src_stride_v, dst_rgb24, dst_stride_rgb24,
+ yuvconstants, width, height);
+ case kFilterBilinear:
+ case kFilterBox:
+ case kFilterLinear:
+ return I422ToRGB24MatrixLinear(
+ src_y, src_stride_y, src_u, src_stride_u, src_v, src_stride_v,
+ dst_rgb24, dst_stride_rgb24, yuvconstants, width, height);
+ }
+
+ return -1;
+}
+
LIBYUV_API
int I420ToARGBMatrixFilter(const uint8_t* src_y,
int src_stride_y,
@@ -6998,6 +8160,35 @@ int I422ToARGBMatrixFilter(const uint8_t* src_y,
}
LIBYUV_API
+int I420ToRGB24MatrixFilter(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_rgb24,
+ int dst_stride_rgb24,
+ const struct YuvConstants* yuvconstants,
+ int width,
+ int height,
+ enum FilterMode filter) {
+ switch (filter) {
+ case kFilterNone:
+ return I420ToRGB24Matrix(src_y, src_stride_y, src_u, src_stride_u, src_v,
+ src_stride_v, dst_rgb24, dst_stride_rgb24,
+ yuvconstants, width, height);
+ case kFilterLinear: // TODO(fb): Implement Linear using Bilinear stride 0
+ case kFilterBilinear:
+ case kFilterBox:
+ return I420ToRGB24MatrixBilinear(
+ src_y, src_stride_y, src_u, src_stride_u, src_v, src_stride_v,
+ dst_rgb24, dst_stride_rgb24, yuvconstants, width, height);
+ }
+
+ return -1;
+}
+
+LIBYUV_API
int I010ToAR30MatrixFilter(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
@@ -7015,13 +8206,12 @@ int I010ToAR30MatrixFilter(const uint16_t* src_y,
return I010ToAR30Matrix(src_y, src_stride_y, src_u, src_stride_u, src_v,
src_stride_v, dst_ar30, dst_stride_ar30,
yuvconstants, width, height);
+ case kFilterLinear: // TODO(fb): Implement Linear using Bilinear stride 0
case kFilterBilinear:
case kFilterBox:
return I010ToAR30MatrixBilinear(
src_y, src_stride_y, src_u, src_stride_u, src_v, src_stride_v,
dst_ar30, dst_stride_ar30, yuvconstants, width, height);
- case kFilterLinear:
- return -1;
}
return -1;
@@ -7074,13 +8264,12 @@ int I010ToARGBMatrixFilter(const uint16_t* src_y,
return I010ToARGBMatrix(src_y, src_stride_y, src_u, src_stride_u, src_v,
src_stride_v, dst_argb, dst_stride_argb,
yuvconstants, width, height);
+ case kFilterLinear: // TODO(fb): Implement Linear using Bilinear stride 0
case kFilterBilinear:
case kFilterBox:
return I010ToARGBMatrixBilinear(
src_y, src_stride_y, src_u, src_stride_u, src_v, src_stride_v,
dst_argb, dst_stride_argb, yuvconstants, width, height);
- case kFilterLinear:
- return -1;
}
return -1;
@@ -7137,14 +8326,13 @@ int I420AlphaToARGBMatrixFilter(const uint8_t* src_y,
src_v, src_stride_v, src_a, src_stride_a,
dst_argb, dst_stride_argb, yuvconstants,
width, height, attenuate);
+ case kFilterLinear: // TODO(fb): Implement Linear using Bilinear stride 0
case kFilterBilinear:
case kFilterBox:
return I420AlphaToARGBMatrixBilinear(
src_y, src_stride_y, src_u, src_stride_u, src_v, src_stride_v, src_a,
src_stride_a, dst_argb, dst_stride_argb, yuvconstants, width, height,
attenuate);
- case kFilterLinear:
- return -1;
}
return -1;
@@ -7206,14 +8394,13 @@ int I010AlphaToARGBMatrixFilter(const uint16_t* src_y,
src_v, src_stride_v, src_a, src_stride_a,
dst_argb, dst_stride_argb, yuvconstants,
width, height, attenuate);
+ case kFilterLinear: // TODO(fb): Implement Linear using Bilinear stride 0
case kFilterBilinear:
case kFilterBox:
return I010AlphaToARGBMatrixBilinear(
src_y, src_stride_y, src_u, src_stride_u, src_v, src_stride_v, src_a,
src_stride_a, dst_argb, dst_stride_argb, yuvconstants, width, height,
attenuate);
- case kFilterLinear:
- return -1;
}
return -1;
@@ -7253,6 +8440,8 @@ int I210AlphaToARGBMatrixFilter(const uint16_t* src_y,
return -1;
}
+// TODO(fb): Verify this function works correctly. P010 is like NV12 but 10 bit
+// UV is biplanar.
LIBYUV_API
int P010ToARGBMatrixFilter(const uint16_t* src_y,
int src_stride_y,
@@ -7269,13 +8458,12 @@ int P010ToARGBMatrixFilter(const uint16_t* src_y,
return P010ToARGBMatrix(src_y, src_stride_y, src_uv, src_stride_uv,
dst_argb, dst_stride_argb, yuvconstants, width,
height);
+ case kFilterLinear: // TODO(fb): Implement Linear using Bilinear stride 0
case kFilterBilinear:
case kFilterBox:
return P010ToARGBMatrixBilinear(src_y, src_stride_y, src_uv,
src_stride_uv, dst_argb, dst_stride_argb,
yuvconstants, width, height);
- case kFilterLinear:
- return -1;
}
return -1;
@@ -7324,13 +8512,12 @@ int P010ToAR30MatrixFilter(const uint16_t* src_y,
return P010ToAR30Matrix(src_y, src_stride_y, src_uv, src_stride_uv,
dst_ar30, dst_stride_ar30, yuvconstants, width,
height);
+ case kFilterLinear: // TODO(fb): Implement Linear using Bilinear stride 0
case kFilterBilinear:
case kFilterBox:
return P010ToAR30MatrixBilinear(src_y, src_stride_y, src_uv,
src_stride_uv, dst_ar30, dst_stride_ar30,
yuvconstants, width, height);
- case kFilterLinear:
- return -1;
}
return -1;
diff --git a/files/source/convert_from.cc b/source/convert_from.cc
index 8bd07e4c..e69da9e9 100644
--- a/files/source/convert_from.cc
+++ b/source/convert_from.cc
@@ -52,19 +52,26 @@ static int I420ToI4xx(const uint8_t* src_y,
const int dst_y_height = Abs(src_y_height);
const int src_uv_width = SUBSAMPLE(src_y_width, 1, 1);
const int src_uv_height = SUBSAMPLE(src_y_height, 1, 1);
+ int r;
if (src_y_width == 0 || src_y_height == 0 || dst_uv_width <= 0 ||
dst_uv_height <= 0) {
return -1;
}
if (dst_y) {
- ScalePlane(src_y, src_stride_y, src_y_width, src_y_height, dst_y,
- dst_stride_y, dst_y_width, dst_y_height, kFilterBilinear);
+ r = ScalePlane(src_y, src_stride_y, src_y_width, src_y_height, dst_y,
+ dst_stride_y, dst_y_width, dst_y_height, kFilterBilinear);
+ if (r != 0) {
+ return r;
+ }
}
- ScalePlane(src_u, src_stride_u, src_uv_width, src_uv_height, dst_u,
- dst_stride_u, dst_uv_width, dst_uv_height, kFilterBilinear);
- ScalePlane(src_v, src_stride_v, src_uv_width, src_uv_height, dst_v,
- dst_stride_v, dst_uv_width, dst_uv_height, kFilterBilinear);
- return 0;
+ r = ScalePlane(src_u, src_stride_u, src_uv_width, src_uv_height, dst_u,
+ dst_stride_u, dst_uv_width, dst_uv_height, kFilterBilinear);
+ if (r != 0) {
+ return r;
+ }
+ r = ScalePlane(src_v, src_stride_v, src_uv_width, src_uv_height, dst_v,
+ dst_stride_v, dst_uv_width, dst_uv_height, kFilterBilinear);
+ return r;
}
// Convert 8 bit YUV to 10 bit.
@@ -223,21 +230,28 @@ int I010ToI410(const uint16_t* src_y,
int dst_stride_v,
int width,
int height) {
+ int r;
if (width == 0 || height == 0) {
return -1;
}
if (dst_y) {
- ScalePlane_12(src_y, src_stride_y, width, height, dst_y, dst_stride_y,
- Abs(width), Abs(height), kFilterBilinear);
- }
- ScalePlane_12(src_u, src_stride_u, SUBSAMPLE(width, 1, 1),
- SUBSAMPLE(height, 1, 1), dst_u, dst_stride_u, Abs(width),
- Abs(height), kFilterBilinear);
- ScalePlane_12(src_v, src_stride_v, SUBSAMPLE(width, 1, 1),
- SUBSAMPLE(height, 1, 1), dst_v, dst_stride_v, Abs(width),
- Abs(height), kFilterBilinear);
- return 0;
+ r = ScalePlane_12(src_y, src_stride_y, width, height, dst_y, dst_stride_y,
+ Abs(width), Abs(height), kFilterBilinear);
+ if (r != 0) {
+ return r;
+ }
+ }
+ r = ScalePlane_12(src_u, src_stride_u, SUBSAMPLE(width, 1, 1),
+ SUBSAMPLE(height, 1, 1), dst_u, dst_stride_u, Abs(width),
+ Abs(height), kFilterBilinear);
+ if (r != 0) {
+ return r;
+ }
+ r = ScalePlane_12(src_v, src_stride_v, SUBSAMPLE(width, 1, 1),
+ SUBSAMPLE(height, 1, 1), dst_v, dst_stride_v, Abs(width),
+ Abs(height), kFilterBilinear);
+ return r;
}
// 422 chroma to 444 chroma, 10/12 bit version
@@ -256,19 +270,26 @@ int I210ToI410(const uint16_t* src_y,
int dst_stride_v,
int width,
int height) {
+ int r;
if (width == 0 || height == 0) {
return -1;
}
if (dst_y) {
- ScalePlane_12(src_y, src_stride_y, width, height, dst_y, dst_stride_y,
- Abs(width), Abs(height), kFilterBilinear);
+ r = ScalePlane_12(src_y, src_stride_y, width, height, dst_y, dst_stride_y,
+ Abs(width), Abs(height), kFilterBilinear);
+ if (r != 0) {
+ return r;
+ }
}
- ScalePlane_12(src_u, src_stride_u, SUBSAMPLE(width, 1, 1), height, dst_u,
- dst_stride_u, Abs(width), Abs(height), kFilterBilinear);
- ScalePlane_12(src_v, src_stride_v, SUBSAMPLE(width, 1, 1), height, dst_v,
- dst_stride_v, Abs(width), Abs(height), kFilterBilinear);
- return 0;
+ r = ScalePlane_12(src_u, src_stride_u, SUBSAMPLE(width, 1, 1), height, dst_u,
+ dst_stride_u, Abs(width), Abs(height), kFilterBilinear);
+ if (r != 0) {
+ return r;
+ }
+ r = ScalePlane_12(src_v, src_stride_v, SUBSAMPLE(width, 1, 1), height, dst_v,
+ dst_stride_v, Abs(width), Abs(height), kFilterBilinear);
+ return r;
}
// 422 chroma is 1/2 width, 1x height
@@ -288,19 +309,26 @@ int I422ToI444(const uint8_t* src_y,
int dst_stride_v,
int width,
int height) {
+ int r;
if (width == 0 || height == 0) {
return -1;
}
if (dst_y) {
- ScalePlane(src_y, src_stride_y, width, height, dst_y, dst_stride_y,
- Abs(width), Abs(height), kFilterBilinear);
+ r = ScalePlane(src_y, src_stride_y, width, height, dst_y, dst_stride_y,
+ Abs(width), Abs(height), kFilterBilinear);
+ if (r != 0) {
+ return r;
+ }
}
- ScalePlane(src_u, src_stride_u, SUBSAMPLE(width, 1, 1), height, dst_u,
- dst_stride_u, Abs(width), Abs(height), kFilterBilinear);
- ScalePlane(src_v, src_stride_v, SUBSAMPLE(width, 1, 1), height, dst_v,
- dst_stride_v, Abs(width), Abs(height), kFilterBilinear);
- return 0;
+ r = ScalePlane(src_u, src_stride_u, SUBSAMPLE(width, 1, 1), height, dst_u,
+ dst_stride_u, Abs(width), Abs(height), kFilterBilinear);
+ if (r != 0) {
+ return r;
+ }
+ r = ScalePlane(src_v, src_stride_v, SUBSAMPLE(width, 1, 1), height, dst_v,
+ dst_stride_v, Abs(width), Abs(height), kFilterBilinear);
+ return r;
}
// Copy to I400. Source can be I420,422,444,400,NV12,NV21
@@ -446,6 +474,14 @@ int I420ToYUY2(const uint8_t* src_y,
}
}
#endif
+#if defined(HAS_I422TOYUY2ROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ I422ToYUY2Row = I422ToYUY2Row_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ I422ToYUY2Row = I422ToYUY2Row_LSX;
+ }
+ }
+#endif
#if defined(HAS_I422TOYUY2ROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
I422ToYUY2Row = I422ToYUY2Row_Any_LASX;
@@ -533,6 +569,14 @@ int I422ToUYVY(const uint8_t* src_y,
}
}
#endif
+#if defined(HAS_I422TOUYVYROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ I422ToUYVYRow = I422ToUYVYRow_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ I422ToUYVYRow = I422ToUYVYRow_LSX;
+ }
+ }
+#endif
#if defined(HAS_I422TOUYVYROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
I422ToUYVYRow = I422ToUYVYRow_Any_LASX;
@@ -608,6 +652,14 @@ int I420ToUYVY(const uint8_t* src_y,
}
}
#endif
+#if defined(HAS_I422TOUYVYROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ I422ToUYVYRow = I422ToUYVYRow_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ I422ToUYVYRow = I422ToUYVYRow_LSX;
+ }
+ }
+#endif
#if defined(HAS_I422TOUYVYROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
I422ToUYVYRow = I422ToUYVYRow_Any_LASX;
diff --git a/files/source/convert_from_argb.cc b/source/convert_from_argb.cc
index e50c2af3..b45de8c8 100644
--- a/files/source/convert_from_argb.cc
+++ b/source/convert_from_argb.cc
@@ -76,6 +76,14 @@ int ARGBToI444(const uint8_t* src_argb,
}
}
#endif
+#if defined(HAS_ARGBTOUV444ROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ ARGBToUV444Row = ARGBToUV444Row_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBToUV444Row = ARGBToUV444Row_LSX;
+ }
+ }
+#endif
#if defined(HAS_ARGBTOUV444ROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
ARGBToUV444Row = ARGBToUV444Row_Any_LASX;
@@ -116,6 +124,14 @@ int ARGBToI444(const uint8_t* src_argb,
}
}
#endif
+#if defined(HAS_ARGBTOYROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ ARGBToYRow = ARGBToYRow_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBToYRow = ARGBToYRow_LSX;
+ }
+ }
+#endif
#if defined(HAS_ARGBTOYROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
ARGBToYRow = ARGBToYRow_Any_LASX;
@@ -124,6 +140,11 @@ int ARGBToI444(const uint8_t* src_argb,
}
}
#endif
+#if defined(HAS_ARGBTOYROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ ARGBToYRow = ARGBToYRow_RVV;
+ }
+#endif
for (y = 0; y < height; ++y) {
ARGBToUV444Row(src_argb, dst_u, dst_v, width);
@@ -230,7 +251,24 @@ int ARGBToI422(const uint8_t* src_argb,
}
}
#endif
-
+#if defined(HAS_ARGBTOYROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ ARGBToYRow = ARGBToYRow_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBToYRow = ARGBToYRow_LSX;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTOYROW_LSX) && defined(HAS_ARGBTOUVROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ ARGBToYRow = ARGBToYRow_Any_LSX;
+ ARGBToUVRow = ARGBToUVRow_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBToYRow = ARGBToYRow_LSX;
+ ARGBToUVRow = ARGBToUVRow_LSX;
+ }
+ }
+#endif
#if defined(HAS_ARGBTOYROW_LASX) && defined(HAS_ARGBTOUVROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
ARGBToYRow = ARGBToYRow_Any_LASX;
@@ -241,6 +279,11 @@ int ARGBToI422(const uint8_t* src_argb,
}
}
#endif
+#if defined(HAS_ARGBTOYROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ ARGBToYRow = ARGBToYRow_RVV;
+ }
+#endif
for (y = 0; y < height; ++y) {
ARGBToUVRow(src_argb, 0, dst_u, dst_v, width);
@@ -340,6 +383,14 @@ int ARGBToNV12(const uint8_t* src_argb,
}
}
#endif
+#if defined(HAS_ARGBTOYROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ ARGBToYRow = ARGBToYRow_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBToYRow = ARGBToYRow_LSX;
+ }
+ }
+#endif
#if defined(HAS_ARGBTOYROW_LASX) && defined(HAS_ARGBTOUVROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
ARGBToYRow = ARGBToYRow_Any_LASX;
@@ -350,6 +401,11 @@ int ARGBToNV12(const uint8_t* src_argb,
}
}
#endif
+#if defined(HAS_ARGBTOYROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ ARGBToYRow = ARGBToYRow_RVV;
+ }
+#endif
#if defined(HAS_MERGEUVROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
MergeUVRow_ = MergeUVRow_Any_SSE2;
@@ -361,11 +417,19 @@ int ARGBToNV12(const uint8_t* src_argb,
#if defined(HAS_MERGEUVROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
MergeUVRow_ = MergeUVRow_Any_AVX2;
- if (IS_ALIGNED(halfwidth, 32)) {
+ if (IS_ALIGNED(halfwidth, 16)) {
MergeUVRow_ = MergeUVRow_AVX2;
}
}
#endif
+#if defined(HAS_MERGEUVROW_AVX512BW)
+ if (TestCpuFlag(kCpuHasAVX512BW)) {
+ MergeUVRow_ = MergeUVRow_Any_AVX512BW;
+ if (IS_ALIGNED(halfwidth, 32)) {
+ MergeUVRow_ = MergeUVRow_AVX512BW;
+ }
+ }
+#endif
#if defined(HAS_MERGEUVROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
MergeUVRow_ = MergeUVRow_Any_NEON;
@@ -390,10 +454,17 @@ int ARGBToNV12(const uint8_t* src_argb,
}
}
#endif
+#if defined(HAS_MERGEUVROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ MergeUVRow_ = MergeUVRow_RVV;
+ }
+#endif
{
// Allocate a rows of uv.
align_buffer_64(row_u, ((halfwidth + 31) & ~31) * 2);
uint8_t* row_v = row_u + ((halfwidth + 31) & ~31);
+ if (!row_u)
+ return 1;
for (y = 0; y < height - 1; y += 2) {
ARGBToUVRow(src_argb, src_stride_argb, row_u, row_v, width);
@@ -502,6 +573,24 @@ int ARGBToNV21(const uint8_t* src_argb,
}
}
#endif
+#if defined(HAS_ARGBTOYROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ ARGBToYRow = ARGBToYRow_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBToYRow = ARGBToYRow_LSX;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTOYROW_LSX) && defined(HAS_ARGBTOUVROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ ARGBToYRow = ARGBToYRow_Any_LSX;
+ ARGBToUVRow = ARGBToUVRow_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBToYRow = ARGBToYRow_LSX;
+ ARGBToUVRow = ARGBToUVRow_LSX;
+ }
+ }
+#endif
#if defined(HAS_ARGBTOYROW_LASX) && defined(HAS_ARGBTOUVROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
ARGBToYRow = ARGBToYRow_Any_LASX;
@@ -512,6 +601,11 @@ int ARGBToNV21(const uint8_t* src_argb,
}
}
#endif
+#if defined(HAS_ARGBTOYROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ ARGBToYRow = ARGBToYRow_RVV;
+ }
+#endif
#if defined(HAS_MERGEUVROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
MergeUVRow_ = MergeUVRow_Any_SSE2;
@@ -523,11 +617,19 @@ int ARGBToNV21(const uint8_t* src_argb,
#if defined(HAS_MERGEUVROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
MergeUVRow_ = MergeUVRow_Any_AVX2;
- if (IS_ALIGNED(halfwidth, 32)) {
+ if (IS_ALIGNED(halfwidth, 16)) {
MergeUVRow_ = MergeUVRow_AVX2;
}
}
#endif
+#if defined(HAS_MERGEUVROW_AVX512BW)
+ if (TestCpuFlag(kCpuHasAVX512BW)) {
+ MergeUVRow_ = MergeUVRow_Any_AVX512BW;
+ if (IS_ALIGNED(halfwidth, 64)) {
+ MergeUVRow_ = MergeUVRow_AVX512BW;
+ }
+ }
+#endif
#if defined(HAS_MERGEUVROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
MergeUVRow_ = MergeUVRow_Any_NEON;
@@ -552,10 +654,17 @@ int ARGBToNV21(const uint8_t* src_argb,
}
}
#endif
+#if defined(HAS_MERGEUVROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ MergeUVRow_ = MergeUVRow_RVV;
+ }
+#endif
{
// Allocate a rows of uv.
align_buffer_64(row_u, ((halfwidth + 31) & ~31) * 2);
uint8_t* row_v = row_u + ((halfwidth + 31) & ~31);
+ if (!row_u)
+ return 1;
for (y = 0; y < height - 1; y += 2) {
ARGBToUVRow(src_argb, src_stride_argb, row_u, row_v, width);
@@ -663,6 +772,27 @@ int ABGRToNV12(const uint8_t* src_abgr,
}
}
#endif
+#if defined(HAS_ABGRTOYROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ ABGRToYRow = ABGRToYRow_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ ABGRToYRow = ABGRToYRow_LSX;
+ }
+ }
+#endif
+#if defined(HAS_ABGRTOYROW_LASX)
+ if (TestCpuFlag(kCpuHasLASX)) {
+ ABGRToYRow = ABGRToYRow_Any_LASX;
+ if (IS_ALIGNED(width, 32)) {
+ ABGRToYRow = ABGRToYRow_LASX;
+ }
+ }
+#endif
+#if defined(HAS_ABGRTOYROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ ABGRToYRow = ABGRToYRow_RVV;
+ }
+#endif
#if defined(HAS_MERGEUVROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
MergeUVRow_ = MergeUVRow_Any_SSE2;
@@ -674,11 +804,19 @@ int ABGRToNV12(const uint8_t* src_abgr,
#if defined(HAS_MERGEUVROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
MergeUVRow_ = MergeUVRow_Any_AVX2;
- if (IS_ALIGNED(halfwidth, 32)) {
+ if (IS_ALIGNED(halfwidth, 16)) {
MergeUVRow_ = MergeUVRow_AVX2;
}
}
#endif
+#if defined(HAS_MERGEUVROW_AVX512BW)
+ if (TestCpuFlag(kCpuHasAVX512BW)) {
+ MergeUVRow_ = MergeUVRow_Any_AVX512BW;
+ if (IS_ALIGNED(halfwidth, 64)) {
+ MergeUVRow_ = MergeUVRow_AVX512BW;
+ }
+ }
+#endif
#if defined(HAS_MERGEUVROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
MergeUVRow_ = MergeUVRow_Any_NEON;
@@ -703,10 +841,17 @@ int ABGRToNV12(const uint8_t* src_abgr,
}
}
#endif
+#if defined(HAS_MERGEUVROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ MergeUVRow_ = MergeUVRow_RVV;
+ }
+#endif
{
// Allocate a rows of uv.
align_buffer_64(row_u, ((halfwidth + 31) & ~31) * 2);
uint8_t* row_v = row_u + ((halfwidth + 31) & ~31);
+ if (!row_u)
+ return 1;
for (y = 0; y < height - 1; y += 2) {
ABGRToUVRow(src_abgr, src_stride_abgr, row_u, row_v, width);
@@ -815,6 +960,27 @@ int ABGRToNV21(const uint8_t* src_abgr,
}
}
#endif
+#if defined(HAS_ABGRTOYROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ ABGRToYRow = ABGRToYRow_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ ABGRToYRow = ABGRToYRow_LSX;
+ }
+ }
+#endif
+#if defined(HAS_ABGRTOYROW_LASX)
+ if (TestCpuFlag(kCpuHasLASX)) {
+ ABGRToYRow = ABGRToYRow_Any_LASX;
+ if (IS_ALIGNED(width, 32)) {
+ ABGRToYRow = ABGRToYRow_LASX;
+ }
+ }
+#endif
+#if defined(HAS_ABGRTOYROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ ABGRToYRow = ABGRToYRow_RVV;
+ }
+#endif
#if defined(HAS_MERGEUVROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
MergeUVRow_ = MergeUVRow_Any_SSE2;
@@ -826,11 +992,19 @@ int ABGRToNV21(const uint8_t* src_abgr,
#if defined(HAS_MERGEUVROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
MergeUVRow_ = MergeUVRow_Any_AVX2;
- if (IS_ALIGNED(halfwidth, 32)) {
+ if (IS_ALIGNED(halfwidth, 16)) {
MergeUVRow_ = MergeUVRow_AVX2;
}
}
#endif
+#if defined(HAS_MERGEUVROW_AVX512BW)
+ if (TestCpuFlag(kCpuHasAVX512BW)) {
+ MergeUVRow_ = MergeUVRow_Any_AVX512BW;
+ if (IS_ALIGNED(halfwidth, 64)) {
+ MergeUVRow_ = MergeUVRow_AVX512BW;
+ }
+ }
+#endif
#if defined(HAS_MERGEUVROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
MergeUVRow_ = MergeUVRow_Any_NEON;
@@ -855,10 +1029,17 @@ int ABGRToNV21(const uint8_t* src_abgr,
}
}
#endif
+#if defined(HAS_MERGEUVROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ MergeUVRow_ = MergeUVRow_RVV;
+ }
+#endif
{
// Allocate a rows of uv.
align_buffer_64(row_u, ((halfwidth + 31) & ~31) * 2);
uint8_t* row_v = row_u + ((halfwidth + 31) & ~31);
+ if (!row_u)
+ return 1;
for (y = 0; y < height - 1; y += 2) {
ABGRToUVRow(src_abgr, src_stride_abgr, row_u, row_v, width);
@@ -972,6 +1153,24 @@ int ARGBToYUY2(const uint8_t* src_argb,
}
}
#endif
+#if defined(HAS_ARGBTOYROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ ARGBToYRow = ARGBToYRow_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBToYRow = ARGBToYRow_LSX;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTOYROW_LSX) && defined(HAS_ARGBTOUVROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ ARGBToYRow = ARGBToYRow_Any_LSX;
+ ARGBToUVRow = ARGBToUVRow_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBToYRow = ARGBToYRow_LSX;
+ ARGBToUVRow = ARGBToUVRow_LSX;
+ }
+ }
+#endif
#if defined(HAS_ARGBTOYROW_LASX) && defined(HAS_ARGBTOUVROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
ARGBToYRow = ARGBToYRow_Any_LASX;
@@ -982,6 +1181,11 @@ int ARGBToYUY2(const uint8_t* src_argb,
}
}
#endif
+#if defined(HAS_ARGBTOYROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ ARGBToYRow = ARGBToYRow_RVV;
+ }
+#endif
#if defined(HAS_I422TOYUY2ROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
I422ToYUY2Row = I422ToYUY2Row_Any_SSE2;
@@ -1014,6 +1218,14 @@ int ARGBToYUY2(const uint8_t* src_argb,
}
}
#endif
+#if defined(HAS_I422TOYUY2ROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ I422ToYUY2Row = I422ToYUY2Row_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ I422ToYUY2Row = I422ToYUY2Row_LSX;
+ }
+ }
+#endif
#if defined(HAS_I422TOYUY2ROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
I422ToYUY2Row = I422ToYUY2Row_Any_LASX;
@@ -1028,6 +1240,8 @@ int ARGBToYUY2(const uint8_t* src_argb,
align_buffer_64(row_y, ((width + 63) & ~63) * 2);
uint8_t* row_u = row_y + ((width + 63) & ~63);
uint8_t* row_v = row_u + ((width + 63) & ~63) / 2;
+ if (!row_y)
+ return 1;
for (y = 0; y < height; ++y) {
ARGBToUVRow(src_argb, 0, row_u, row_v, width);
@@ -1135,6 +1349,24 @@ int ARGBToUYVY(const uint8_t* src_argb,
}
}
#endif
+#if defined(HAS_ARGBTOYROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ ARGBToYRow = ARGBToYRow_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBToYRow = ARGBToYRow_LSX;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTOYROW_LSX) && defined(HAS_ARGBTOUVROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ ARGBToYRow = ARGBToYRow_Any_LSX;
+ ARGBToUVRow = ARGBToUVRow_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBToYRow = ARGBToYRow_LSX;
+ ARGBToUVRow = ARGBToUVRow_LSX;
+ }
+ }
+#endif
#if defined(HAS_ARGBTOYROW_LASX) && defined(HAS_ARGBTOUVROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
ARGBToYRow = ARGBToYRow_Any_LASX;
@@ -1145,6 +1377,11 @@ int ARGBToUYVY(const uint8_t* src_argb,
}
}
#endif
+#if defined(HAS_ARGBTOYROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ ARGBToYRow = ARGBToYRow_RVV;
+ }
+#endif
#if defined(HAS_I422TOUYVYROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
I422ToUYVYRow = I422ToUYVYRow_Any_SSE2;
@@ -1177,6 +1414,14 @@ int ARGBToUYVY(const uint8_t* src_argb,
}
}
#endif
+#if defined(HAS_I422TOUYVYROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ I422ToUYVYRow = I422ToUYVYRow_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ I422ToUYVYRow = I422ToUYVYRow_LSX;
+ }
+ }
+#endif
#if defined(HAS_I422TOUYVYROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
I422ToUYVYRow = I422ToUYVYRow_Any_LASX;
@@ -1191,6 +1436,8 @@ int ARGBToUYVY(const uint8_t* src_argb,
align_buffer_64(row_y, ((width + 63) & ~63) * 2);
uint8_t* row_u = row_y + ((width + 63) & ~63);
uint8_t* row_v = row_u + ((width + 63) & ~63) / 2;
+ if (!row_y)
+ return 1;
for (y = 0; y < height; ++y) {
ARGBToUVRow(src_argb, 0, row_u, row_v, width);
@@ -1262,6 +1509,14 @@ int ARGBToI400(const uint8_t* src_argb,
}
}
#endif
+#if defined(HAS_ARGBTOYROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ ARGBToYRow = ARGBToYRow_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBToYRow = ARGBToYRow_LSX;
+ }
+ }
+#endif
#if defined(HAS_ARGBTOYROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
ARGBToYRow = ARGBToYRow_Any_LASX;
@@ -1270,6 +1525,11 @@ int ARGBToI400(const uint8_t* src_argb,
}
}
#endif
+#if defined(HAS_ARGBTOYROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ ARGBToYRow = ARGBToYRow_RVV;
+ }
+#endif
for (y = 0; y < height; ++y) {
ARGBToYRow(src_argb, dst_y, width);
@@ -1279,6 +1539,7 @@ int ARGBToI400(const uint8_t* src_argb,
return 0;
}
+#ifndef __riscv
// Shuffle table for converting ARGB to RGBA.
static const uvec8 kShuffleMaskARGBToRGBA = {
3u, 0u, 1u, 2u, 7u, 4u, 5u, 6u, 11u, 8u, 9u, 10u, 15u, 12u, 13u, 14u};
@@ -1294,6 +1555,47 @@ int ARGBToRGBA(const uint8_t* src_argb,
return ARGBShuffle(src_argb, src_stride_argb, dst_rgba, dst_stride_rgba,
(const uint8_t*)(&kShuffleMaskARGBToRGBA), width, height);
}
+#else
+// Convert ARGB to RGBA.
+LIBYUV_API
+int ARGBToRGBA(const uint8_t* src_argb,
+ int src_stride_argb,
+ uint8_t* dst_rgba,
+ int dst_stride_rgba,
+ int width,
+ int height) {
+ int y;
+ void (*ARGBToRGBARow)(const uint8_t* src_argb, uint8_t* dst_rgba, int width) =
+ ARGBToRGBARow_C;
+ if (!src_argb || !dst_rgba || width <= 0 || height == 0) {
+ return -1;
+ }
+ if (height < 0) {
+ height = -height;
+ src_argb = src_argb + (height - 1) * src_stride_argb;
+ src_stride_argb = -src_stride_argb;
+ }
+ // Coalesce rows.
+ if (src_stride_argb == width * 4 && dst_stride_rgba == width * 4) {
+ width *= height;
+ height = 1;
+ src_stride_argb = dst_stride_rgba = 0;
+ }
+
+#if defined(HAS_ARGBTORGBAROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ ARGBToRGBARow = ARGBToRGBARow_RVV;
+ }
+#endif
+
+ for (y = 0; y < height; ++y) {
+ ARGBToRGBARow(src_argb, dst_rgba, width);
+ src_argb += src_stride_argb;
+ dst_rgba += dst_stride_rgba;
+ }
+ return 0;
+}
+#endif
// Convert ARGB To RGB24.
LIBYUV_API
@@ -1360,6 +1662,14 @@ int ARGBToRGB24(const uint8_t* src_argb,
}
}
#endif
+#if defined(HAS_ARGBTORGB24ROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ ARGBToRGB24Row = ARGBToRGB24Row_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBToRGB24Row = ARGBToRGB24Row_LSX;
+ }
+ }
+#endif
#if defined(HAS_ARGBTORGB24ROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
ARGBToRGB24Row = ARGBToRGB24Row_Any_LASX;
@@ -1368,6 +1678,11 @@ int ARGBToRGB24(const uint8_t* src_argb,
}
}
#endif
+#if defined(HAS_ARGBTORGB24ROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ ARGBToRGB24Row = ARGBToRGB24Row_RVV;
+ }
+#endif
for (y = 0; y < height; ++y) {
ARGBToRGB24Row(src_argb, dst_rgb24, width);
@@ -1434,6 +1749,14 @@ int ARGBToRAW(const uint8_t* src_argb,
}
}
#endif
+#if defined(HAS_ARGBTORAWROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ ARGBToRAWRow = ARGBToRAWRow_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBToRAWRow = ARGBToRAWRow_LSX;
+ }
+ }
+#endif
#if defined(HAS_ARGBTORAWROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
ARGBToRAWRow = ARGBToRAWRow_Any_LASX;
@@ -1442,6 +1765,11 @@ int ARGBToRAW(const uint8_t* src_argb,
}
}
#endif
+#if defined(HAS_ARGBTORAWROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ ARGBToRAWRow = ARGBToRAWRow_RVV;
+ }
+#endif
for (y = 0; y < height; ++y) {
ARGBToRAWRow(src_argb, dst_raw, width);
@@ -1467,7 +1795,7 @@ int ARGBToRGB565Dither(const uint8_t* src_argb,
int height) {
int y;
void (*ARGBToRGB565DitherRow)(const uint8_t* src_argb, uint8_t* dst_rgb,
- const uint32_t dither4, int width) =
+ uint32_t dither4, int width) =
ARGBToRGB565DitherRow_C;
if (!src_argb || !dst_rgb565 || width <= 0 || height == 0) {
return -1;
@@ -1512,6 +1840,14 @@ int ARGBToRGB565Dither(const uint8_t* src_argb,
}
}
#endif
+#if defined(HAS_ARGBTORGB565DITHERROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_Any_LSX;
+ if (IS_ALIGNED(width, 8)) {
+ ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_LSX;
+ }
+ }
+#endif
#if defined(HAS_ARGBTORGB565DITHERROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_Any_LASX;
@@ -1589,6 +1925,15 @@ int ARGBToRGB565(const uint8_t* src_argb,
}
}
#endif
+#if defined(HAS_ARGBTORGB565ROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ ARGBToRGB565Row = ARGBToRGB565Row_Any_LSX;
+ if (IS_ALIGNED(width, 8)) {
+ ARGBToRGB565Row = ARGBToRGB565Row_LSX;
+ }
+ }
+#endif
+
#if defined(HAS_ARGBTORGB565ROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
ARGBToRGB565Row = ARGBToRGB565Row_Any_LASX;
@@ -1663,6 +2008,14 @@ int ARGBToARGB1555(const uint8_t* src_argb,
}
}
#endif
+#if defined(HAS_ARGBTOARGB1555ROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ ARGBToARGB1555Row = ARGBToARGB1555Row_Any_LSX;
+ if (IS_ALIGNED(width, 8)) {
+ ARGBToARGB1555Row = ARGBToARGB1555Row_LSX;
+ }
+ }
+#endif
#if defined(HAS_ARGBTOARGB1555ROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
ARGBToARGB1555Row = ARGBToARGB1555Row_Any_LASX;
@@ -1737,6 +2090,14 @@ int ARGBToARGB4444(const uint8_t* src_argb,
}
}
#endif
+#if defined(HAS_ARGBTOARGB4444ROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ ARGBToARGB4444Row = ARGBToARGB4444Row_Any_LSX;
+ if (IS_ALIGNED(width, 8)) {
+ ARGBToARGB4444Row = ARGBToARGB4444Row_LSX;
+ }
+ }
+#endif
#if defined(HAS_ARGBTOARGB4444ROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
ARGBToARGB4444Row = ARGBToARGB4444Row_Any_LASX;
@@ -1858,19 +2219,19 @@ int ARGBToJ420(const uint8_t* src_argb,
int src_stride_argb,
uint8_t* dst_yj,
int dst_stride_yj,
- uint8_t* dst_u,
- int dst_stride_u,
- uint8_t* dst_v,
- int dst_stride_v,
+ uint8_t* dst_uj,
+ int dst_stride_uj,
+ uint8_t* dst_vj,
+ int dst_stride_vj,
int width,
int height) {
int y;
void (*ARGBToUVJRow)(const uint8_t* src_argb0, int src_stride_argb,
- uint8_t* dst_u, uint8_t* dst_v, int width) =
+ uint8_t* dst_uj, uint8_t* dst_vj, int width) =
ARGBToUVJRow_C;
void (*ARGBToYJRow)(const uint8_t* src_argb, uint8_t* dst_yj, int width) =
ARGBToYJRow_C;
- if (!src_argb || !dst_yj || !dst_u || !dst_v || width <= 0 || height == 0) {
+ if (!src_argb || !dst_yj || !dst_uj || !dst_vj || width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
@@ -1879,6 +2240,22 @@ int ARGBToJ420(const uint8_t* src_argb,
src_argb = src_argb + (height - 1) * src_stride_argb;
src_stride_argb = -src_stride_argb;
}
+#if defined(HAS_ARGBTOYJROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ ARGBToYJRow = ARGBToYJRow_Any_NEON;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBToYJRow = ARGBToYJRow_NEON;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTOUVJROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ ARGBToUVJRow = ARGBToUVJRow_Any_NEON;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBToUVJRow = ARGBToUVJRow_NEON;
+ }
+ }
+#endif
#if defined(HAS_ARGBTOYJROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToYJRow = ARGBToYJRow_Any_SSSE3;
@@ -1903,19 +2280,11 @@ int ARGBToJ420(const uint8_t* src_argb,
}
}
#endif
-#if defined(HAS_ARGBTOYJROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- ARGBToYJRow = ARGBToYJRow_Any_NEON;
- if (IS_ALIGNED(width, 16)) {
- ARGBToYJRow = ARGBToYJRow_NEON;
- }
- }
-#endif
-#if defined(HAS_ARGBTOUVJROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- ARGBToUVJRow = ARGBToUVJRow_Any_NEON;
- if (IS_ALIGNED(width, 16)) {
- ARGBToUVJRow = ARGBToUVJRow_NEON;
+#if defined(HAS_ARGBTOUVJROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ ARGBToUVJRow = ARGBToUVJRow_Any_AVX2;
+ if (IS_ALIGNED(width, 32)) {
+ ARGBToUVJRow = ARGBToUVJRow_AVX2;
}
}
#endif
@@ -1951,18 +2320,23 @@ int ARGBToJ420(const uint8_t* src_argb,
}
}
#endif
+#if defined(HAS_ARGBTOYJROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ ARGBToYJRow = ARGBToYJRow_RVV;
+ }
+#endif
for (y = 0; y < height - 1; y += 2) {
- ARGBToUVJRow(src_argb, src_stride_argb, dst_u, dst_v, width);
+ ARGBToUVJRow(src_argb, src_stride_argb, dst_uj, dst_vj, width);
ARGBToYJRow(src_argb, dst_yj, width);
ARGBToYJRow(src_argb + src_stride_argb, dst_yj + dst_stride_yj, width);
src_argb += src_stride_argb * 2;
dst_yj += dst_stride_yj * 2;
- dst_u += dst_stride_u;
- dst_v += dst_stride_v;
+ dst_uj += dst_stride_uj;
+ dst_vj += dst_stride_vj;
}
if (height & 1) {
- ARGBToUVJRow(src_argb, 0, dst_u, dst_v, width);
+ ARGBToUVJRow(src_argb, 0, dst_uj, dst_vj, width);
ARGBToYJRow(src_argb, dst_yj, width);
}
return 0;
@@ -1974,19 +2348,19 @@ int ARGBToJ422(const uint8_t* src_argb,
int src_stride_argb,
uint8_t* dst_yj,
int dst_stride_yj,
- uint8_t* dst_u,
- int dst_stride_u,
- uint8_t* dst_v,
- int dst_stride_v,
+ uint8_t* dst_uj,
+ int dst_stride_uj,
+ uint8_t* dst_vj,
+ int dst_stride_vj,
int width,
int height) {
int y;
void (*ARGBToUVJRow)(const uint8_t* src_argb0, int src_stride_argb,
- uint8_t* dst_u, uint8_t* dst_v, int width) =
+ uint8_t* dst_uj, uint8_t* dst_vj, int width) =
ARGBToUVJRow_C;
void (*ARGBToYJRow)(const uint8_t* src_argb, uint8_t* dst_yj, int width) =
ARGBToYJRow_C;
- if (!src_argb || !dst_yj || !dst_u || !dst_v || width <= 0 || height == 0) {
+ if (!src_argb || !dst_yj || !dst_uj || !dst_vj || width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
@@ -1997,10 +2371,10 @@ int ARGBToJ422(const uint8_t* src_argb,
}
// Coalesce rows.
if (src_stride_argb == width * 4 && dst_stride_yj == width &&
- dst_stride_u * 2 == width && dst_stride_v * 2 == width) {
+ dst_stride_uj * 2 == width && dst_stride_vj * 2 == width) {
width *= height;
height = 1;
- src_stride_argb = dst_stride_yj = dst_stride_u = dst_stride_v = 0;
+ src_stride_argb = dst_stride_yj = dst_stride_uj = dst_stride_vj = 0;
}
#if defined(HAS_ARGBTOYJROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
@@ -2026,6 +2400,14 @@ int ARGBToJ422(const uint8_t* src_argb,
}
}
#endif
+#if defined(HAS_ARGBTOUVJROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ ARGBToUVJRow = ARGBToUVJRow_Any_AVX2;
+ if (IS_ALIGNED(width, 32)) {
+ ARGBToUVJRow = ARGBToUVJRow_AVX2;
+ }
+ }
+#endif
#if defined(HAS_ARGBTOYJROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ARGBToYJRow = ARGBToYJRow_Any_NEON;
@@ -2074,270 +2456,649 @@ int ARGBToJ422(const uint8_t* src_argb,
}
}
#endif
+#if defined(HAS_ARGBTOYJROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ ARGBToYJRow = ARGBToYJRow_RVV;
+ }
+#endif
for (y = 0; y < height; ++y) {
- ARGBToUVJRow(src_argb, 0, dst_u, dst_v, width);
+ ARGBToUVJRow(src_argb, 0, dst_uj, dst_vj, width);
ARGBToYJRow(src_argb, dst_yj, width);
src_argb += src_stride_argb;
dst_yj += dst_stride_yj;
- dst_u += dst_stride_u;
- dst_v += dst_stride_v;
+ dst_uj += dst_stride_uj;
+ dst_vj += dst_stride_vj;
}
return 0;
}
-// Convert ARGB to AR64.
+// Convert ARGB to J400.
LIBYUV_API
-int ARGBToAR64(const uint8_t* src_argb,
+int ARGBToJ400(const uint8_t* src_argb,
int src_stride_argb,
- uint16_t* dst_ar64,
- int dst_stride_ar64,
+ uint8_t* dst_yj,
+ int dst_stride_yj,
int width,
int height) {
int y;
- void (*ARGBToAR64Row)(const uint8_t* src_argb, uint16_t* dst_ar64,
- int width) = ARGBToAR64Row_C;
- if (!src_argb || !dst_ar64 || width <= 0 || height == 0) {
+ void (*ARGBToYJRow)(const uint8_t* src_argb, uint8_t* dst_yj, int width) =
+ ARGBToYJRow_C;
+ if (!src_argb || !dst_yj || width <= 0 || height == 0) {
return -1;
}
- // Negative height means invert the image.
if (height < 0) {
height = -height;
src_argb = src_argb + (height - 1) * src_stride_argb;
src_stride_argb = -src_stride_argb;
}
// Coalesce rows.
- if (src_stride_argb == width * 4 && dst_stride_ar64 == width * 4) {
+ if (src_stride_argb == width * 4 && dst_stride_yj == width) {
width *= height;
height = 1;
- src_stride_argb = dst_stride_ar64 = 0;
+ src_stride_argb = dst_stride_yj = 0;
}
-#if defined(HAS_ARGBTOAR64ROW_SSSE3)
+#if defined(HAS_ARGBTOYJROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
- ARGBToAR64Row = ARGBToAR64Row_Any_SSSE3;
- if (IS_ALIGNED(width, 4)) {
- ARGBToAR64Row = ARGBToAR64Row_SSSE3;
+ ARGBToYJRow = ARGBToYJRow_Any_SSSE3;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBToYJRow = ARGBToYJRow_SSSE3;
}
}
#endif
-#if defined(HAS_ARGBTOAR64ROW_AVX2)
+#if defined(HAS_ARGBTOYJROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
- ARGBToAR64Row = ARGBToAR64Row_Any_AVX2;
- if (IS_ALIGNED(width, 8)) {
- ARGBToAR64Row = ARGBToAR64Row_AVX2;
+ ARGBToYJRow = ARGBToYJRow_Any_AVX2;
+ if (IS_ALIGNED(width, 32)) {
+ ARGBToYJRow = ARGBToYJRow_AVX2;
}
}
#endif
-#if defined(HAS_ARGBTOAR64ROW_NEON)
+#if defined(HAS_ARGBTOYJROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
- ARGBToAR64Row = ARGBToAR64Row_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- ARGBToAR64Row = ARGBToAR64Row_NEON;
+ ARGBToYJRow = ARGBToYJRow_Any_NEON;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBToYJRow = ARGBToYJRow_NEON;
}
}
#endif
+#if defined(HAS_ARGBTOYJROW_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ ARGBToYJRow = ARGBToYJRow_Any_MSA;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBToYJRow = ARGBToYJRow_MSA;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTOYJROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ ARGBToYJRow = ARGBToYJRow_RVV;
+ }
+#endif
for (y = 0; y < height; ++y) {
- ARGBToAR64Row(src_argb, dst_ar64, width);
+ ARGBToYJRow(src_argb, dst_yj, width);
src_argb += src_stride_argb;
- dst_ar64 += dst_stride_ar64;
+ dst_yj += dst_stride_yj;
}
return 0;
}
-// Convert ARGB to AB64.
+// Convert RGBA to J400.
LIBYUV_API
-int ARGBToAB64(const uint8_t* src_argb,
- int src_stride_argb,
- uint16_t* dst_ab64,
- int dst_stride_ab64,
+int RGBAToJ400(const uint8_t* src_rgba,
+ int src_stride_rgba,
+ uint8_t* dst_yj,
+ int dst_stride_yj,
int width,
int height) {
int y;
- void (*ARGBToAB64Row)(const uint8_t* src_argb, uint16_t* dst_ar64,
- int width) = ARGBToAB64Row_C;
- if (!src_argb || !dst_ab64 || width <= 0 || height == 0) {
+ void (*RGBAToYJRow)(const uint8_t* src_rgba, uint8_t* dst_yj, int width) =
+ RGBAToYJRow_C;
+ if (!src_rgba || !dst_yj || width <= 0 || height == 0) {
return -1;
}
- // Negative height means invert the image.
if (height < 0) {
height = -height;
- src_argb = src_argb + (height - 1) * src_stride_argb;
- src_stride_argb = -src_stride_argb;
+ src_rgba = src_rgba + (height - 1) * src_stride_rgba;
+ src_stride_rgba = -src_stride_rgba;
}
// Coalesce rows.
- if (src_stride_argb == width * 4 && dst_stride_ab64 == width * 4) {
+ if (src_stride_rgba == width * 4 && dst_stride_yj == width) {
width *= height;
height = 1;
- src_stride_argb = dst_stride_ab64 = 0;
+ src_stride_rgba = dst_stride_yj = 0;
}
-#if defined(HAS_ARGBTOAB64ROW_SSSE3)
+#if defined(HAS_RGBATOYJROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
- ARGBToAB64Row = ARGBToAB64Row_Any_SSSE3;
- if (IS_ALIGNED(width, 4)) {
- ARGBToAB64Row = ARGBToAB64Row_SSSE3;
+ RGBAToYJRow = RGBAToYJRow_Any_SSSE3;
+ if (IS_ALIGNED(width, 16)) {
+ RGBAToYJRow = RGBAToYJRow_SSSE3;
}
}
#endif
-#if defined(HAS_ARGBTOAB64ROW_AVX2)
+#if defined(HAS_RGBATOYJROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
- ARGBToAB64Row = ARGBToAB64Row_Any_AVX2;
- if (IS_ALIGNED(width, 8)) {
- ARGBToAB64Row = ARGBToAB64Row_AVX2;
+ RGBAToYJRow = RGBAToYJRow_Any_AVX2;
+ if (IS_ALIGNED(width, 32)) {
+ RGBAToYJRow = RGBAToYJRow_AVX2;
}
}
#endif
-#if defined(HAS_ARGBTOAB64ROW_NEON)
+#if defined(HAS_RGBATOYJROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
- ARGBToAB64Row = ARGBToAB64Row_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- ARGBToAB64Row = ARGBToAB64Row_NEON;
+ RGBAToYJRow = RGBAToYJRow_Any_NEON;
+ if (IS_ALIGNED(width, 16)) {
+ RGBAToYJRow = RGBAToYJRow_NEON;
+ }
+ }
+#endif
+#if defined(HAS_RGBATOYJROW_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ RGBAToYJRow = RGBAToYJRow_Any_MSA;
+ if (IS_ALIGNED(width, 16)) {
+ RGBAToYJRow = RGBAToYJRow_MSA;
+ }
+ }
+#endif
+#if defined(HAS_RGBATOYJROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ RGBAToYJRow = RGBAToYJRow_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ RGBAToYJRow = RGBAToYJRow_LSX;
}
}
#endif
+#if defined(HAS_RGBATOYJROW_LASX)
+ if (TestCpuFlag(kCpuHasLASX)) {
+ RGBAToYJRow = RGBAToYJRow_Any_LASX;
+ if (IS_ALIGNED(width, 32)) {
+ RGBAToYJRow = RGBAToYJRow_LASX;
+ }
+ }
+#endif
+#if defined(HAS_RGBATOYJROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ RGBAToYJRow = RGBAToYJRow_RVV;
+ }
+#endif
for (y = 0; y < height; ++y) {
- ARGBToAB64Row(src_argb, dst_ab64, width);
- src_argb += src_stride_argb;
- dst_ab64 += dst_stride_ab64;
+ RGBAToYJRow(src_rgba, dst_yj, width);
+ src_rgba += src_stride_rgba;
+ dst_yj += dst_stride_yj;
}
return 0;
}
-// Convert ARGB to J400.
+// Convert ABGR to J420. (JPeg full range I420).
LIBYUV_API
-int ARGBToJ400(const uint8_t* src_argb,
- int src_stride_argb,
+int ABGRToJ420(const uint8_t* src_abgr,
+ int src_stride_abgr,
uint8_t* dst_yj,
int dst_stride_yj,
+ uint8_t* dst_uj,
+ int dst_stride_uj,
+ uint8_t* dst_vj,
+ int dst_stride_vj,
int width,
int height) {
int y;
- void (*ARGBToYJRow)(const uint8_t* src_argb, uint8_t* dst_yj, int width) =
- ARGBToYJRow_C;
- if (!src_argb || !dst_yj || width <= 0 || height == 0) {
+ void (*ABGRToUVJRow)(const uint8_t* src_abgr0, int src_stride_abgr,
+ uint8_t* dst_uj, uint8_t* dst_vj, int width) =
+ ABGRToUVJRow_C;
+ void (*ABGRToYJRow)(const uint8_t* src_abgr, uint8_t* dst_yj, int width) =
+ ABGRToYJRow_C;
+ if (!src_abgr || !dst_yj || !dst_uj || !dst_vj || width <= 0 || height == 0) {
return -1;
}
+ // Negative height means invert the image.
if (height < 0) {
height = -height;
- src_argb = src_argb + (height - 1) * src_stride_argb;
- src_stride_argb = -src_stride_argb;
+ src_abgr = src_abgr + (height - 1) * src_stride_abgr;
+ src_stride_abgr = -src_stride_abgr;
+ }
+#if defined(HAS_ABGRTOYJROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ ABGRToYJRow = ABGRToYJRow_Any_SSSE3;
+ if (IS_ALIGNED(width, 16)) {
+ ABGRToYJRow = ABGRToYJRow_SSSE3;
+ }
+ }
+#endif
+#if defined(HAS_ABGRTOUVJROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ ABGRToUVJRow = ABGRToUVJRow_Any_SSSE3;
+ if (IS_ALIGNED(width, 16)) {
+ ABGRToUVJRow = ABGRToUVJRow_SSSE3;
+ }
+ }
+#endif
+#if defined(HAS_ABGRTOYJROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ ABGRToYJRow = ABGRToYJRow_Any_AVX2;
+ if (IS_ALIGNED(width, 32)) {
+ ABGRToYJRow = ABGRToYJRow_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_ABGRTOUVJROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ ABGRToUVJRow = ABGRToUVJRow_Any_AVX2;
+ if (IS_ALIGNED(width, 32)) {
+ ABGRToUVJRow = ABGRToUVJRow_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_ABGRTOYJROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ ABGRToYJRow = ABGRToYJRow_Any_NEON;
+ if (IS_ALIGNED(width, 16)) {
+ ABGRToYJRow = ABGRToYJRow_NEON;
+ }
+ }
+#endif
+#if defined(HAS_ABGRTOUVJROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ ABGRToUVJRow = ABGRToUVJRow_Any_NEON;
+ if (IS_ALIGNED(width, 16)) {
+ ABGRToUVJRow = ABGRToUVJRow_NEON;
+ }
+ }
+#endif
+#if defined(HAS_ABGRTOYJROW_MSA) && defined(HAS_ABGRTOUVJROW_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ ABGRToYJRow = ABGRToYJRow_Any_MSA;
+ ABGRToUVJRow = ABGRToUVJRow_Any_MSA;
+ if (IS_ALIGNED(width, 16)) {
+ ABGRToYJRow = ABGRToYJRow_MSA;
+ ABGRToUVJRow = ABGRToUVJRow_MSA;
+ }
+ }
+#endif
+#if defined(HAS_ABGRTOYJROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ ABGRToYJRow = ABGRToYJRow_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ ABGRToYJRow = ABGRToYJRow_LSX;
+ }
+ }
+#endif
+#if defined(HAS_ABGRTOYJROW_LASX)
+ if (TestCpuFlag(kCpuHasLASX)) {
+ ABGRToYJRow = ABGRToYJRow_Any_LASX;
+ if (IS_ALIGNED(width, 32)) {
+ ABGRToYJRow = ABGRToYJRow_LASX;
+ }
+ }
+#endif
+#if defined(HAS_ABGRTOYJROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ ABGRToYJRow = ABGRToYJRow_RVV;
+ }
+#endif
+
+ for (y = 0; y < height - 1; y += 2) {
+ ABGRToUVJRow(src_abgr, src_stride_abgr, dst_uj, dst_vj, width);
+ ABGRToYJRow(src_abgr, dst_yj, width);
+ ABGRToYJRow(src_abgr + src_stride_abgr, dst_yj + dst_stride_yj, width);
+ src_abgr += src_stride_abgr * 2;
+ dst_yj += dst_stride_yj * 2;
+ dst_uj += dst_stride_uj;
+ dst_vj += dst_stride_vj;
+ }
+ if (height & 1) {
+ ABGRToUVJRow(src_abgr, 0, dst_uj, dst_vj, width);
+ ABGRToYJRow(src_abgr, dst_yj, width);
+ }
+ return 0;
+}
+
+// Convert ABGR to J422. (JPeg full range I422).
+LIBYUV_API
+int ABGRToJ422(const uint8_t* src_abgr,
+ int src_stride_abgr,
+ uint8_t* dst_yj,
+ int dst_stride_yj,
+ uint8_t* dst_uj,
+ int dst_stride_uj,
+ uint8_t* dst_vj,
+ int dst_stride_vj,
+ int width,
+ int height) {
+ int y;
+ void (*ABGRToUVJRow)(const uint8_t* src_abgr0, int src_stride_abgr,
+ uint8_t* dst_uj, uint8_t* dst_vj, int width) =
+ ABGRToUVJRow_C;
+ void (*ABGRToYJRow)(const uint8_t* src_abgr, uint8_t* dst_yj, int width) =
+ ABGRToYJRow_C;
+ if (!src_abgr || !dst_yj || !dst_uj || !dst_vj || width <= 0 || height == 0) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ src_abgr = src_abgr + (height - 1) * src_stride_abgr;
+ src_stride_abgr = -src_stride_abgr;
}
// Coalesce rows.
- if (src_stride_argb == width * 4 && dst_stride_yj == width) {
+ if (src_stride_abgr == width * 4 && dst_stride_yj == width &&
+ dst_stride_uj * 2 == width && dst_stride_vj * 2 == width) {
width *= height;
height = 1;
- src_stride_argb = dst_stride_yj = 0;
+ src_stride_abgr = dst_stride_yj = dst_stride_uj = dst_stride_vj = 0;
}
-#if defined(HAS_ARGBTOYJROW_SSSE3)
+#if defined(HAS_ABGRTOYJROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
- ARGBToYJRow = ARGBToYJRow_Any_SSSE3;
+ ABGRToYJRow = ABGRToYJRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
- ARGBToYJRow = ARGBToYJRow_SSSE3;
+ ABGRToYJRow = ABGRToYJRow_SSSE3;
}
}
#endif
-#if defined(HAS_ARGBTOYJROW_AVX2)
+#if defined(HAS_ABGRTOUVJROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ ABGRToUVJRow = ABGRToUVJRow_Any_SSSE3;
+ if (IS_ALIGNED(width, 16)) {
+ ABGRToUVJRow = ABGRToUVJRow_SSSE3;
+ }
+ }
+#endif
+#if defined(HAS_ABGRTOYJROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
- ARGBToYJRow = ARGBToYJRow_Any_AVX2;
+ ABGRToYJRow = ABGRToYJRow_Any_AVX2;
if (IS_ALIGNED(width, 32)) {
- ARGBToYJRow = ARGBToYJRow_AVX2;
+ ABGRToYJRow = ABGRToYJRow_AVX2;
}
}
#endif
-#if defined(HAS_ARGBTOYJROW_NEON)
+#if defined(HAS_ABGRTOUVJROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ ABGRToUVJRow = ABGRToUVJRow_Any_AVX2;
+ if (IS_ALIGNED(width, 32)) {
+ ABGRToUVJRow = ABGRToUVJRow_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_ABGRTOYJROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
- ARGBToYJRow = ARGBToYJRow_Any_NEON;
+ ABGRToYJRow = ABGRToYJRow_Any_NEON;
if (IS_ALIGNED(width, 16)) {
- ARGBToYJRow = ARGBToYJRow_NEON;
+ ABGRToYJRow = ABGRToYJRow_NEON;
}
}
#endif
-#if defined(HAS_ARGBTOYJROW_MSA)
+#if defined(HAS_ABGRTOUVJROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ ABGRToUVJRow = ABGRToUVJRow_Any_NEON;
+ if (IS_ALIGNED(width, 16)) {
+ ABGRToUVJRow = ABGRToUVJRow_NEON;
+ }
+ }
+#endif
+#if defined(HAS_ABGRTOYJROW_MSA) && defined(HAS_ABGRTOUVJROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
- ARGBToYJRow = ARGBToYJRow_Any_MSA;
+ ABGRToYJRow = ABGRToYJRow_Any_MSA;
+ ABGRToUVJRow = ABGRToUVJRow_Any_MSA;
if (IS_ALIGNED(width, 16)) {
- ARGBToYJRow = ARGBToYJRow_MSA;
+ ABGRToYJRow = ABGRToYJRow_MSA;
+ }
+ if (IS_ALIGNED(width, 32)) {
+ ABGRToUVJRow = ABGRToUVJRow_MSA;
+ }
+ }
+#endif
+#if defined(HAS_ABGRTOYJROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ ABGRToYJRow = ABGRToYJRow_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ ABGRToYJRow = ABGRToYJRow_LSX;
+ }
+ }
+#endif
+#if defined(HAS_ABGRTOYJROW_LASX)
+ if (TestCpuFlag(kCpuHasLASX)) {
+ ABGRToYJRow = ABGRToYJRow_Any_LASX;
+ if (IS_ALIGNED(width, 32)) {
+ ABGRToYJRow = ABGRToYJRow_LASX;
}
}
#endif
+#if defined(HAS_ABGRTOYJROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ ABGRToYJRow = ABGRToYJRow_RVV;
+ }
+#endif
for (y = 0; y < height; ++y) {
- ARGBToYJRow(src_argb, dst_yj, width);
- src_argb += src_stride_argb;
+ ABGRToUVJRow(src_abgr, 0, dst_uj, dst_vj, width);
+ ABGRToYJRow(src_abgr, dst_yj, width);
+ src_abgr += src_stride_abgr;
dst_yj += dst_stride_yj;
+ dst_uj += dst_stride_uj;
+ dst_vj += dst_stride_vj;
}
return 0;
}
-// Convert RGBA to J400.
+// Convert ABGR to J400.
LIBYUV_API
-int RGBAToJ400(const uint8_t* src_rgba,
- int src_stride_rgba,
+int ABGRToJ400(const uint8_t* src_abgr,
+ int src_stride_abgr,
uint8_t* dst_yj,
int dst_stride_yj,
int width,
int height) {
int y;
- void (*RGBAToYJRow)(const uint8_t* src_rgba, uint8_t* dst_yj, int width) =
- RGBAToYJRow_C;
- if (!src_rgba || !dst_yj || width <= 0 || height == 0) {
+ void (*ABGRToYJRow)(const uint8_t* src_abgr, uint8_t* dst_yj, int width) =
+ ABGRToYJRow_C;
+ if (!src_abgr || !dst_yj || width <= 0 || height == 0) {
return -1;
}
if (height < 0) {
height = -height;
- src_rgba = src_rgba + (height - 1) * src_stride_rgba;
- src_stride_rgba = -src_stride_rgba;
+ src_abgr = src_abgr + (height - 1) * src_stride_abgr;
+ src_stride_abgr = -src_stride_abgr;
}
// Coalesce rows.
- if (src_stride_rgba == width * 4 && dst_stride_yj == width) {
+ if (src_stride_abgr == width * 4 && dst_stride_yj == width) {
width *= height;
height = 1;
- src_stride_rgba = dst_stride_yj = 0;
+ src_stride_abgr = dst_stride_yj = 0;
}
-#if defined(HAS_RGBATOYJROW_SSSE3)
+#if defined(HAS_ABGRTOYJROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
- RGBAToYJRow = RGBAToYJRow_Any_SSSE3;
+ ABGRToYJRow = ABGRToYJRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
- RGBAToYJRow = RGBAToYJRow_SSSE3;
+ ABGRToYJRow = ABGRToYJRow_SSSE3;
}
}
#endif
-#if defined(HAS_RGBATOYJROW_AVX2)
+#if defined(HAS_ABGRTOYJROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
- RGBAToYJRow = RGBAToYJRow_Any_AVX2;
+ ABGRToYJRow = ABGRToYJRow_Any_AVX2;
if (IS_ALIGNED(width, 32)) {
- RGBAToYJRow = RGBAToYJRow_AVX2;
+ ABGRToYJRow = ABGRToYJRow_AVX2;
}
}
#endif
-#if defined(HAS_RGBATOYJROW_NEON)
+#if defined(HAS_ABGRTOYJROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
- RGBAToYJRow = RGBAToYJRow_Any_NEON;
+ ABGRToYJRow = ABGRToYJRow_Any_NEON;
if (IS_ALIGNED(width, 16)) {
- RGBAToYJRow = RGBAToYJRow_NEON;
+ ABGRToYJRow = ABGRToYJRow_NEON;
}
}
#endif
-#if defined(HAS_RGBATOYJROW_MSA)
+#if defined(HAS_ABGRTOYJROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
- RGBAToYJRow = RGBAToYJRow_Any_MSA;
+ ABGRToYJRow = ABGRToYJRow_Any_MSA;
if (IS_ALIGNED(width, 16)) {
- RGBAToYJRow = RGBAToYJRow_MSA;
+ ABGRToYJRow = ABGRToYJRow_MSA;
}
}
#endif
+#if defined(HAS_ABGRTOYJROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ ABGRToYJRow = ABGRToYJRow_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ ABGRToYJRow = ABGRToYJRow_LSX;
+ }
+ }
+#endif
+#if defined(HAS_ABGRTOYJROW_LASX)
+ if (TestCpuFlag(kCpuHasLASX)) {
+ ABGRToYJRow = ABGRToYJRow_Any_LASX;
+ if (IS_ALIGNED(width, 32)) {
+ ABGRToYJRow = ABGRToYJRow_LASX;
+ }
+ }
+#endif
+#if defined(HAS_ABGRTOYJROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ ABGRToYJRow = ABGRToYJRow_RVV;
+ }
+#endif
for (y = 0; y < height; ++y) {
- RGBAToYJRow(src_rgba, dst_yj, width);
- src_rgba += src_stride_rgba;
+ ABGRToYJRow(src_abgr, dst_yj, width);
+ src_abgr += src_stride_abgr;
dst_yj += dst_stride_yj;
}
return 0;
}
+// Convert ARGB to AR64.
+LIBYUV_API
+int ARGBToAR64(const uint8_t* src_argb,
+ int src_stride_argb,
+ uint16_t* dst_ar64,
+ int dst_stride_ar64,
+ int width,
+ int height) {
+ int y;
+ void (*ARGBToAR64Row)(const uint8_t* src_argb, uint16_t* dst_ar64,
+ int width) = ARGBToAR64Row_C;
+ if (!src_argb || !dst_ar64 || width <= 0 || height == 0) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ src_argb = src_argb + (height - 1) * src_stride_argb;
+ src_stride_argb = -src_stride_argb;
+ }
+ // Coalesce rows.
+ if (src_stride_argb == width * 4 && dst_stride_ar64 == width * 4) {
+ width *= height;
+ height = 1;
+ src_stride_argb = dst_stride_ar64 = 0;
+ }
+#if defined(HAS_ARGBTOAR64ROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ ARGBToAR64Row = ARGBToAR64Row_Any_SSSE3;
+ if (IS_ALIGNED(width, 4)) {
+ ARGBToAR64Row = ARGBToAR64Row_SSSE3;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTOAR64ROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ ARGBToAR64Row = ARGBToAR64Row_Any_AVX2;
+ if (IS_ALIGNED(width, 8)) {
+ ARGBToAR64Row = ARGBToAR64Row_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTOAR64ROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ ARGBToAR64Row = ARGBToAR64Row_Any_NEON;
+ if (IS_ALIGNED(width, 8)) {
+ ARGBToAR64Row = ARGBToAR64Row_NEON;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTOAR64ROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ ARGBToAR64Row = ARGBToAR64Row_RVV;
+ }
+#endif
+
+ for (y = 0; y < height; ++y) {
+ ARGBToAR64Row(src_argb, dst_ar64, width);
+ src_argb += src_stride_argb;
+ dst_ar64 += dst_stride_ar64;
+ }
+ return 0;
+}
+
+// Convert ARGB to AB64.
+LIBYUV_API
+int ARGBToAB64(const uint8_t* src_argb,
+ int src_stride_argb,
+ uint16_t* dst_ab64,
+ int dst_stride_ab64,
+ int width,
+ int height) {
+ int y;
+ void (*ARGBToAB64Row)(const uint8_t* src_argb, uint16_t* dst_ar64,
+ int width) = ARGBToAB64Row_C;
+ if (!src_argb || !dst_ab64 || width <= 0 || height == 0) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ src_argb = src_argb + (height - 1) * src_stride_argb;
+ src_stride_argb = -src_stride_argb;
+ }
+ // Coalesce rows.
+ if (src_stride_argb == width * 4 && dst_stride_ab64 == width * 4) {
+ width *= height;
+ height = 1;
+ src_stride_argb = dst_stride_ab64 = 0;
+ }
+#if defined(HAS_ARGBTOAB64ROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ ARGBToAB64Row = ARGBToAB64Row_Any_SSSE3;
+ if (IS_ALIGNED(width, 4)) {
+ ARGBToAB64Row = ARGBToAB64Row_SSSE3;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTOAB64ROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ ARGBToAB64Row = ARGBToAB64Row_Any_AVX2;
+ if (IS_ALIGNED(width, 8)) {
+ ARGBToAB64Row = ARGBToAB64Row_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTOAB64ROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ ARGBToAB64Row = ARGBToAB64Row_Any_NEON;
+ if (IS_ALIGNED(width, 8)) {
+ ARGBToAB64Row = ARGBToAB64Row_NEON;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTOAB64ROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ ARGBToAB64Row = ARGBToAB64Row_RVV;
+ }
+#endif
+
+ for (y = 0; y < height; ++y) {
+ ARGBToAB64Row(src_argb, dst_ab64, width);
+ src_argb += src_stride_argb;
+ dst_ab64 += dst_stride_ab64;
+ }
+ return 0;
+}
+
// Enabled if 1 pass is available
-#if defined(HAS_RAWTOYJROW_NEON) || defined(HAS_RAWTOYJROW_MSA)
+#if defined(HAS_RAWTOYJROW_NEON) || defined(HAS_RAWTOYJROW_MSA) || \
+ defined(HAS_RAWTOYJROW_RVV)
#define HAS_RAWTOYJROW
#endif
@@ -2355,7 +3116,7 @@ int RAWToJNV21(const uint8_t* src_raw,
int halfwidth = (width + 1) >> 1;
#if defined(HAS_RAWTOYJROW)
void (*RAWToUVJRow)(const uint8_t* src_raw, int src_stride_raw,
- uint8_t* dst_u, uint8_t* dst_v, int width) =
+ uint8_t* dst_uj, uint8_t* dst_vj, int width) =
RAWToUVJRow_C;
void (*RAWToYJRow)(const uint8_t* src_raw, uint8_t* dst_y, int width) =
RAWToYJRow_C;
@@ -2363,12 +3124,12 @@ int RAWToJNV21(const uint8_t* src_raw,
void (*RAWToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb, int width) =
RAWToARGBRow_C;
void (*ARGBToUVJRow)(const uint8_t* src_argb0, int src_stride_argb,
- uint8_t* dst_u, uint8_t* dst_v, int width) =
+ uint8_t* dst_uj, uint8_t* dst_vj, int width) =
ARGBToUVJRow_C;
void (*ARGBToYJRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) =
ARGBToYJRow_C;
#endif
- void (*MergeUVRow_)(const uint8_t* src_u, const uint8_t* src_v,
+ void (*MergeUVRow_)(const uint8_t* src_uj, const uint8_t* src_vj,
uint8_t* dst_vu, int width) = MergeUVRow_C;
if (!src_raw || !dst_y || !dst_vu || width <= 0 || height == 0) {
return -1;
@@ -2403,6 +3164,27 @@ int RAWToJNV21(const uint8_t* src_raw,
}
}
#endif
+#if defined(HAS_RAWTOYJROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ RAWToYJRow = RAWToYJRow_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ RAWToYJRow = RAWToYJRow_LSX;
+ }
+ }
+#endif
+#if defined(HAS_RAWTOYJROW_LASX)
+ if (TestCpuFlag(kCpuHasLASX)) {
+ RAWToYJRow = RAWToYJRow_Any_LASX;
+ if (IS_ALIGNED(width, 32)) {
+ RAWToYJRow = RAWToYJRow_LASX;
+ }
+ }
+#endif
+#if defined(HAS_RAWTOYJROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ RAWToYJRow = RAWToYJRow_RVV;
+ }
+#endif
// Other platforms do intermediate conversion from RAW to ARGB.
#else // HAS_RAWTOYJROW
@@ -2459,11 +3241,19 @@ int RAWToJNV21(const uint8_t* src_raw,
#if defined(HAS_MERGEUVROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
MergeUVRow_ = MergeUVRow_Any_AVX2;
- if (IS_ALIGNED(halfwidth, 32)) {
+ if (IS_ALIGNED(halfwidth, 16)) {
MergeUVRow_ = MergeUVRow_AVX2;
}
}
#endif
+#if defined(HAS_MERGEUVROW_AVX512BW)
+ if (TestCpuFlag(kCpuHasAVX512BW)) {
+ MergeUVRow_ = MergeUVRow_Any_AVX512BW;
+ if (IS_ALIGNED(halfwidth, 64)) {
+ MergeUVRow_ = MergeUVRow_AVX512BW;
+ }
+ }
+#endif
#if defined(HAS_MERGEUVROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
MergeUVRow_ = MergeUVRow_Any_NEON;
@@ -2488,29 +3278,41 @@ int RAWToJNV21(const uint8_t* src_raw,
}
}
#endif
+#if defined(HAS_MERGEUVROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ MergeUVRow_ = MergeUVRow_RVV;
+ }
+#endif
{
+#if defined(HAS_RAWTOYJROW)
// Allocate a row of uv.
- align_buffer_64(row_u, ((halfwidth + 31) & ~31) * 2);
- uint8_t* row_v = row_u + ((halfwidth + 31) & ~31);
-#if !defined(HAS_RAWTOYJROW)
- // Allocate 2 rows of ARGB.
- const int kRowSize = (width * 4 + 31) & ~31;
- align_buffer_64(row, kRowSize * 2);
+ const int row_uv_size = ((halfwidth + 31) & ~31);
+ align_buffer_64(row_uj, row_uv_size * 2);
+ uint8_t* row_vj = row_uj + row_uv_size;
+#else
+ // Allocate row of uv and 2 rows of ARGB.
+ const int row_size = ((width * 4 + 31) & ~31);
+ const int row_uv_size = ((halfwidth + 31) & ~31);
+ align_buffer_64(row_uj, row_uv_size * 2 + row_size * 2);
+ uint8_t* row_vj = row_uj + row_uv_size;
+ uint8_t* row = row_vj + row_uv_size;
#endif
+ if (!row_uj)
+ return 1;
for (y = 0; y < height - 1; y += 2) {
#if defined(HAS_RAWTOYJROW)
- RAWToUVJRow(src_raw, src_stride_raw, row_u, row_v, width);
- MergeUVRow_(row_v, row_u, dst_vu, halfwidth);
+ RAWToUVJRow(src_raw, src_stride_raw, row_uj, row_vj, width);
+ MergeUVRow_(row_vj, row_uj, dst_vu, halfwidth);
RAWToYJRow(src_raw, dst_y, width);
RAWToYJRow(src_raw + src_stride_raw, dst_y + dst_stride_y, width);
#else
RAWToARGBRow(src_raw, row, width);
- RAWToARGBRow(src_raw + src_stride_raw, row + kRowSize, width);
- ARGBToUVJRow(row, kRowSize, row_u, row_v, width);
- MergeUVRow_(row_v, row_u, dst_vu, halfwidth);
+ RAWToARGBRow(src_raw + src_stride_raw, row + row_size, width);
+ ARGBToUVJRow(row, row_size, row_uj, row_vj, width);
+ MergeUVRow_(row_vj, row_uj, dst_vu, halfwidth);
ARGBToYJRow(row, dst_y, width);
- ARGBToYJRow(row + kRowSize, dst_y + dst_stride_y, width);
+ ARGBToYJRow(row + row_size, dst_y + dst_stride_y, width);
#endif
src_raw += src_stride_raw * 2;
dst_y += dst_stride_y * 2;
@@ -2518,20 +3320,17 @@ int RAWToJNV21(const uint8_t* src_raw,
}
if (height & 1) {
#if defined(HAS_RAWTOYJROW)
- RAWToUVJRow(src_raw, 0, row_u, row_v, width);
- MergeUVRow_(row_v, row_u, dst_vu, halfwidth);
+ RAWToUVJRow(src_raw, 0, row_uj, row_vj, width);
+ MergeUVRow_(row_vj, row_uj, dst_vu, halfwidth);
RAWToYJRow(src_raw, dst_y, width);
#else
RAWToARGBRow(src_raw, row, width);
- ARGBToUVJRow(row, 0, row_u, row_v, width);
- MergeUVRow_(row_v, row_u, dst_vu, halfwidth);
+ ARGBToUVJRow(row, 0, row_uj, row_vj, width);
+ MergeUVRow_(row_vj, row_uj, dst_vu, halfwidth);
ARGBToYJRow(row, dst_y, width);
#endif
}
-#if !defined(HAS_RAWTOYJROW)
- free_aligned_buffer_64(row);
-#endif
- free_aligned_buffer_64(row_u);
+ free_aligned_buffer_64(row_uj);
}
return 0;
}
diff --git a/files/source/convert_jpeg.cc b/source/convert_jpeg.cc
index d7556ee9..d7556ee9 100644
--- a/files/source/convert_jpeg.cc
+++ b/source/convert_jpeg.cc
diff --git a/files/source/convert_to_argb.cc b/source/convert_to_argb.cc
index 84df16c8..84df16c8 100644
--- a/files/source/convert_to_argb.cc
+++ b/source/convert_to_argb.cc
diff --git a/files/source/convert_to_i420.cc b/source/convert_to_i420.cc
index 5869ecd7..5869ecd7 100644
--- a/files/source/convert_to_i420.cc
+++ b/source/convert_to_i420.cc
diff --git a/files/source/cpu_id.cc b/source/cpu_id.cc
index 56fe60e4..eedce16b 100644
--- a/files/source/cpu_id.cc
+++ b/source/cpu_id.cc
@@ -40,7 +40,6 @@ extern "C" {
// cpu_info_ variable for SIMD instruction sets detected.
LIBYUV_API int cpu_info_ = 0;
-// TODO(fbarchard): Consider using int for cpuid so casting is not needed.
// Low level cpuid for X86.
#if (defined(_M_IX86) || defined(_M_X64) || defined(__i386__) || \
defined(__x86_64__)) && \
@@ -108,14 +107,14 @@ void CpuId(int eax, int ecx, int* cpu_info) {
// }
// For VS2013 and earlier 32 bit, the _xgetbv(0) optimizer produces bad code.
// https://code.google.com/p/libyuv/issues/detail?id=529
-#if defined(_M_IX86) && (_MSC_VER < 1900)
+#if defined(_M_IX86) && defined(_MSC_VER) && (_MSC_VER < 1900)
#pragma optimize("g", off)
#endif
#if (defined(_M_IX86) || defined(_M_X64) || defined(__i386__) || \
defined(__x86_64__)) && \
!defined(__pnacl__) && !defined(__CLR_VER) && !defined(__native_client__)
// X86 CPUs have xgetbv to detect OS saves high parts of ymm registers.
-int GetXCR0() {
+static int GetXCR0() {
int xcr0 = 0;
#if defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 160040219)
xcr0 = (int)_xgetbv(0); // VS2010 SP1 required. NOLINT
@@ -129,7 +128,7 @@ int GetXCR0() {
#define GetXCR0() 0
#endif // defined(_M_IX86) || defined(_M_X64) ..
// Return optimization to previous setting.
-#if defined(_M_IX86) && (_MSC_VER < 1900)
+#if defined(_M_IX86) && defined(_MSC_VER) && (_MSC_VER < 1900)
#pragma optimize("g", on)
#endif
@@ -137,13 +136,14 @@ int GetXCR0() {
// For Arm, but public to allow testing on any CPU
LIBYUV_API SAFEBUFFERS int ArmCpuCaps(const char* cpuinfo_name) {
char cpuinfo_line[512];
- FILE* f = fopen(cpuinfo_name, "r");
+ FILE* f = fopen(cpuinfo_name, "re");
if (!f) {
// Assume Neon if /proc/cpuinfo is unavailable.
// This will occur for Chrome sandbox for Pepper or Render process.
return kCpuHasNEON;
}
- while (fgets(cpuinfo_line, sizeof(cpuinfo_line) - 1, f)) {
+ memset(cpuinfo_line, 0, sizeof(cpuinfo_line));
+ while (fgets(cpuinfo_line, sizeof(cpuinfo_line), f)) {
if (memcmp(cpuinfo_line, "Features", 8) == 0) {
char* p = strstr(cpuinfo_line, " neon");
if (p && (p[5] == ' ' || p[5] == '\n')) {
@@ -162,17 +162,90 @@ LIBYUV_API SAFEBUFFERS int ArmCpuCaps(const char* cpuinfo_name) {
return 0;
}
-// TODO(fbarchard): Consider read_msa_ir().
+LIBYUV_API SAFEBUFFERS int RiscvCpuCaps(const char* cpuinfo_name) {
+ char cpuinfo_line[512];
+ int flag = 0;
+ FILE* f = fopen(cpuinfo_name, "re");
+ if (!f) {
+#if defined(__riscv_vector)
+ // Assume RVV if /proc/cpuinfo is unavailable.
+ // This will occur for Chrome sandbox for Pepper or Render process.
+ return kCpuHasRVV;
+#else
+ return 0;
+#endif
+ }
+ memset(cpuinfo_line, 0, sizeof(cpuinfo_line));
+ while (fgets(cpuinfo_line, sizeof(cpuinfo_line), f)) {
+ if (memcmp(cpuinfo_line, "isa", 3) == 0) {
+ // ISA string must begin with rv64{i,e,g} for a 64-bit processor.
+ char* isa = strstr(cpuinfo_line, "rv64");
+ if (isa) {
+ size_t isa_len = strlen(isa);
+ char* extensions;
+ size_t extensions_len = 0;
+ size_t std_isa_len;
+ // Remove the new-line character at the end of string
+ if (isa[isa_len - 1] == '\n') {
+ isa[--isa_len] = '\0';
+ }
+ // 5 ISA characters
+ if (isa_len < 5) {
+ fclose(f);
+ return 0;
+ }
+ // Skip {i,e,g} canonical checking.
+ // Skip rvxxx
+ isa += 5;
+ // Find the very first occurrence of 's', 'x' or 'z'.
+ // To detect multi-letter standard, non-standard, and
+ // supervisor-level extensions.
+ extensions = strpbrk(isa, "zxs");
+ if (extensions) {
+ // Multi-letter extensions are seperated by a single underscore
+ // as described in RISC-V User-Level ISA V2.2.
+ char* ext = strtok(extensions, "_");
+ extensions_len = strlen(extensions);
+ while (ext) {
+ // Search for the ZVFH (Vector FP16) extension.
+ if (!strcmp(ext, "zvfh")) {
+ flag |= kCpuHasRVVZVFH;
+ }
+ ext = strtok(NULL, "_");
+ }
+ }
+ std_isa_len = isa_len - extensions_len - 5;
+ // Detect the v in the standard single-letter extensions.
+ if (memchr(isa, 'v', std_isa_len)) {
+ // The RVV implied the F extension.
+ flag |= kCpuHasRVV;
+ }
+ }
+ }
+#if defined(__riscv_vector)
+ // Assume RVV if /proc/cpuinfo is from x86 host running QEMU.
+ else if ((memcmp(cpuinfo_line, "vendor_id\t: GenuineIntel", 24) == 0) ||
+ (memcmp(cpuinfo_line, "vendor_id\t: AuthenticAMD", 24) == 0)) {
+ fclose(f);
+ return kCpuHasRVV;
+ }
+#endif
+ }
+ fclose(f);
+ return flag;
+}
+
LIBYUV_API SAFEBUFFERS int MipsCpuCaps(const char* cpuinfo_name) {
char cpuinfo_line[512];
- int flag = 0x0;
- FILE* f = fopen(cpuinfo_name, "r");
+ int flag = 0;
+ FILE* f = fopen(cpuinfo_name, "re");
if (!f) {
// Assume nothing if /proc/cpuinfo is unavailable.
// This will occur for Chrome sandbox for Pepper or Render process.
return 0;
}
- while (fgets(cpuinfo_line, sizeof(cpuinfo_line) - 1, f)) {
+ memset(cpuinfo_line, 0, sizeof(cpuinfo_line));
+ while (fgets(cpuinfo_line, sizeof(cpuinfo_line), f)) {
if (memcmp(cpuinfo_line, "cpu model", 9) == 0) {
// Workaround early kernel without MSA in ASEs line.
if (strstr(cpuinfo_line, "Loongson-2K")) {
@@ -191,14 +264,13 @@ LIBYUV_API SAFEBUFFERS int MipsCpuCaps(const char* cpuinfo_name) {
return flag;
}
-// TODO(fbarchard): Consider read_loongarch_ir().
#define LOONGARCH_CFG2 0x2
#define LOONGARCH_CFG2_LSX (1 << 6)
#define LOONGARCH_CFG2_LASX (1 << 7)
#if defined(__loongarch__)
LIBYUV_API SAFEBUFFERS int LoongarchCpuCaps(void) {
- int flag = 0x0;
+ int flag = 0;
uint32_t cfg2 = 0;
__asm__ volatile("cpucfg %0, %1 \n\t" : "+&r"(cfg2) : "r"(LOONGARCH_CFG2));
@@ -220,10 +292,12 @@ static SAFEBUFFERS int GetCpuFlags(void) {
int cpu_info0[4] = {0, 0, 0, 0};
int cpu_info1[4] = {0, 0, 0, 0};
int cpu_info7[4] = {0, 0, 0, 0};
+ int cpu_einfo7[4] = {0, 0, 0, 0};
CpuId(0, 0, cpu_info0);
CpuId(1, 0, cpu_info1);
if (cpu_info0[0] >= 7) {
CpuId(7, 0, cpu_info7);
+ CpuId(7, 1, cpu_einfo7);
}
cpu_info = kCpuHasX86 | ((cpu_info1[3] & 0x04000000) ? kCpuHasSSE2 : 0) |
((cpu_info1[2] & 0x00000200) ? kCpuHasSSSE3 : 0) |
@@ -236,7 +310,9 @@ static SAFEBUFFERS int GetCpuFlags(void) {
((GetXCR0() & 6) == 6)) { // Test OS saves YMM registers
cpu_info |= kCpuHasAVX | ((cpu_info7[1] & 0x00000020) ? kCpuHasAVX2 : 0) |
((cpu_info1[2] & 0x00001000) ? kCpuHasFMA3 : 0) |
- ((cpu_info1[2] & 0x20000000) ? kCpuHasF16C : 0);
+ ((cpu_info1[2] & 0x20000000) ? kCpuHasF16C : 0) |
+ ((cpu_einfo7[0] & 0x00000010) ? kCpuHasAVXVNNI : 0) |
+ ((cpu_einfo7[3] & 0x00000010) ? kCpuHasAVXVNNIINT8 : 0);
// Detect AVX512bw
if ((GetXCR0() & 0xe0) == 0xe0) {
@@ -246,8 +322,7 @@ static SAFEBUFFERS int GetCpuFlags(void) {
cpu_info |= (cpu_info7[2] & 0x00000040) ? kCpuHasAVX512VBMI2 : 0;
cpu_info |= (cpu_info7[2] & 0x00000800) ? kCpuHasAVX512VNNI : 0;
cpu_info |= (cpu_info7[2] & 0x00001000) ? kCpuHasAVX512VBITALG : 0;
- cpu_info |= (cpu_info7[2] & 0x00004000) ? kCpuHasAVX512VPOPCNTDQ : 0;
- cpu_info |= (cpu_info7[2] & 0x00000100) ? kCpuHasGFNI : 0;
+ cpu_info |= (cpu_einfo7[3] & 0x00080000) ? kCpuHasAVX10 : 0;
}
}
#endif
@@ -277,6 +352,10 @@ static SAFEBUFFERS int GetCpuFlags(void) {
#endif
cpu_info |= kCpuHasARM;
#endif // __arm__
+#if defined(__riscv) && defined(__linux__)
+ cpu_info = RiscvCpuCaps("/proc/cpuinfo");
+ cpu_info |= kCpuHasRISCV;
+#endif // __riscv
cpu_info |= kCpuInitialized;
return cpu_info;
}
diff --git a/files/source/mjpeg_decoder.cc b/source/mjpeg_decoder.cc
index 4ccf00a3..0141da8a 100644
--- a/files/source/mjpeg_decoder.cc
+++ b/source/mjpeg_decoder.cc
@@ -109,7 +109,7 @@ LIBYUV_BOOL MJpegDecoder::LoadFrame(const uint8_t* src, size_t src_len) {
}
buf_.data = src;
- buf_.len = static_cast<int>(src_len);
+ buf_.len = (int)src_len;
buf_vec_.pos = 0;
decompress_struct_->client_data = &buf_vec_;
#ifdef HAVE_SETJMP
@@ -428,7 +428,7 @@ boolean fill_input_buffer(j_decompress_ptr cinfo) {
void skip_input_data(j_decompress_ptr cinfo, long num_bytes) { // NOLINT
jpeg_source_mgr* src = cinfo->src;
- size_t bytes = static_cast<size_t>(num_bytes);
+ size_t bytes = (size_t)num_bytes;
if (bytes > src->bytes_in_buffer) {
src->next_input_byte = nullptr;
src->bytes_in_buffer = 0;
diff --git a/files/source/mjpeg_validate.cc b/source/mjpeg_validate.cc
index ba0a03ab..ba0a03ab 100644
--- a/files/source/mjpeg_validate.cc
+++ b/source/mjpeg_validate.cc
diff --git a/files/source/planar_functions.cc b/source/planar_functions.cc
index 169d4a8f..1c94e260 100644
--- a/files/source/planar_functions.cc
+++ b/source/planar_functions.cc
@@ -75,6 +75,11 @@ void CopyPlane(const uint8_t* src_y,
CopyRow = IS_ALIGNED(width, 32) ? CopyRow_NEON : CopyRow_Any_NEON;
}
#endif
+#if defined(HAS_COPYROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ CopyRow = CopyRow_RVV;
+ }
+#endif
// Copy plane
for (y = 0; y < height; ++y) {
@@ -162,7 +167,7 @@ void Convert8To16Plane(const uint8_t* src_y,
int src_stride_y,
uint16_t* dst_y,
int dst_stride_y,
- int scale, // 16384 for 10 bits
+ int scale, // 1024 for 10 bits
int width,
int height) {
int y;
@@ -333,6 +338,45 @@ int I210Copy(const uint16_t* src_y,
return 0;
}
+// Copy I410.
+LIBYUV_API
+int I410Copy(const uint16_t* src_y,
+ int src_stride_y,
+ const uint16_t* src_u,
+ int src_stride_u,
+ const uint16_t* src_v,
+ int src_stride_v,
+ uint16_t* dst_y,
+ int dst_stride_y,
+ uint16_t* dst_u,
+ int dst_stride_u,
+ uint16_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height) {
+ if ((!src_y && dst_y) || !src_u || !src_v || !dst_u || !dst_v || width <= 0 ||
+ height == 0) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ src_y = src_y + (height - 1) * src_stride_y;
+ src_u = src_u + (height - 1) * src_stride_u;
+ src_v = src_v + (height - 1) * src_stride_v;
+ src_stride_y = -src_stride_y;
+ src_stride_u = -src_stride_u;
+ src_stride_v = -src_stride_v;
+ }
+
+ if (dst_y) {
+ CopyPlane_16(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
+ }
+ CopyPlane_16(src_u, src_stride_u, dst_u, dst_stride_u, width, height);
+ CopyPlane_16(src_v, src_stride_v, dst_v, dst_stride_v, width, height);
+ return 0;
+}
+
// Copy I400.
LIBYUV_API
int I400ToI400(const uint8_t* src_y,
@@ -385,6 +429,7 @@ int I420ToI400(const uint8_t* src_y,
}
// Copy NV12. Supports inverting.
+LIBYUV_API
int NV12Copy(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_uv,
@@ -418,6 +463,7 @@ int NV12Copy(const uint8_t* src_y,
}
// Copy NV21. Supports inverting.
+LIBYUV_API
int NV21Copy(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_vu,
@@ -504,6 +550,11 @@ void SplitUVPlane(const uint8_t* src_uv,
}
}
#endif
+#if defined(HAS_SPLITUVROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ SplitUVRow = SplitUVRow_RVV;
+ }
+#endif
for (y = 0; y < height; ++y) {
// Copy a row of UV.
@@ -553,11 +604,19 @@ void MergeUVPlane(const uint8_t* src_u,
#if defined(HAS_MERGEUVROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
MergeUVRow = MergeUVRow_Any_AVX2;
- if (IS_ALIGNED(width, 32)) {
+ if (IS_ALIGNED(width, 16)) {
MergeUVRow = MergeUVRow_AVX2;
}
}
#endif
+#if defined(HAS_MERGEUVROW_AVX512BW)
+ if (TestCpuFlag(kCpuHasAVX512BW)) {
+ MergeUVRow = MergeUVRow_Any_AVX512BW;
+ if (IS_ALIGNED(width, 32)) {
+ MergeUVRow = MergeUVRow_AVX512BW;
+ }
+ }
+#endif
#if defined(HAS_MERGEUVROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
MergeUVRow = MergeUVRow_Any_NEON;
@@ -582,6 +641,11 @@ void MergeUVPlane(const uint8_t* src_u,
}
}
#endif
+#if defined(HAS_MERGEUVROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ MergeUVRow = MergeUVRow_RVV;
+ }
+#endif
for (y = 0; y < height; ++y) {
// Merge a row of U and V into a row of UV.
@@ -687,7 +751,7 @@ void MergeUVPlane_16(const uint16_t* src_u,
#if defined(HAS_MERGEUVROW_16_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
MergeUVRow_16 = MergeUVRow_16_Any_AVX2;
- if (IS_ALIGNED(width, 16)) {
+ if (IS_ALIGNED(width, 8)) {
MergeUVRow_16 = MergeUVRow_16_AVX2;
}
}
@@ -911,31 +975,31 @@ int NV21ToNV12(const uint8_t* src_y,
return 0;
}
+// Test if tile_height is a power of 2 (16 or 32)
+#define IS_POWEROFTWO(x) (!((x) & ((x)-1)))
+
// Detile a plane of data
// tile width is 16 and assumed.
// tile_height is 16 or 32 for MM21.
// src_stride_y is bytes per row of source ignoring tiling. e.g. 640
// TODO: More detile row functions.
-
LIBYUV_API
-void DetilePlane(const uint8_t* src_y,
- int src_stride_y,
- uint8_t* dst_y,
- int dst_stride_y,
- int width,
- int height,
- int tile_height) {
+int DetilePlane(const uint8_t* src_y,
+ int src_stride_y,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ int width,
+ int height,
+ int tile_height) {
const ptrdiff_t src_tile_stride = 16 * tile_height;
int y;
void (*DetileRow)(const uint8_t* src, ptrdiff_t src_tile_stride, uint8_t* dst,
int width) = DetileRow_C;
- assert(src_stride_y >= 0);
- assert(tile_height > 0);
- assert(src_stride_y > 0);
-
- if (width <= 0 || height == 0) {
- return;
+ if (!src_y || !dst_y || width <= 0 || height == 0 ||
+ !IS_POWEROFTWO(tile_height)) {
+ return -1;
}
+
// Negative height means invert the image.
if (height < 0) {
height = -height;
@@ -970,6 +1034,72 @@ void DetilePlane(const uint8_t* src_y,
src_y = src_y - src_tile_stride + src_stride_y * tile_height;
}
}
+ return 0;
+}
+
+// Convert a plane of 16 bit tiles of 16 x H to linear.
+// tile width is 16 and assumed.
+// tile_height is 16 or 32 for MT2T.
+LIBYUV_API
+int DetilePlane_16(const uint16_t* src_y,
+ int src_stride_y,
+ uint16_t* dst_y,
+ int dst_stride_y,
+ int width,
+ int height,
+ int tile_height) {
+ const ptrdiff_t src_tile_stride = 16 * tile_height;
+ int y;
+ void (*DetileRow_16)(const uint16_t* src, ptrdiff_t src_tile_stride,
+ uint16_t* dst, int width) = DetileRow_16_C;
+ if (!src_y || !dst_y || width <= 0 || height == 0 ||
+ !IS_POWEROFTWO(tile_height)) {
+ return -1;
+ }
+
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ dst_y = dst_y + (height - 1) * dst_stride_y;
+ dst_stride_y = -dst_stride_y;
+ }
+
+#if defined(HAS_DETILEROW_16_SSE2)
+ if (TestCpuFlag(kCpuHasSSE2)) {
+ DetileRow_16 = DetileRow_16_Any_SSE2;
+ if (IS_ALIGNED(width, 16)) {
+ DetileRow_16 = DetileRow_16_SSE2;
+ }
+ }
+#endif
+#if defined(HAS_DETILEROW_16_AVX)
+ if (TestCpuFlag(kCpuHasAVX)) {
+ DetileRow_16 = DetileRow_16_Any_AVX;
+ if (IS_ALIGNED(width, 16)) {
+ DetileRow_16 = DetileRow_16_AVX;
+ }
+ }
+#endif
+#if defined(HAS_DETILEROW_16_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ DetileRow_16 = DetileRow_16_Any_NEON;
+ if (IS_ALIGNED(width, 16)) {
+ DetileRow_16 = DetileRow_16_NEON;
+ }
+ }
+#endif
+
+ // Detile plane
+ for (y = 0; y < height; ++y) {
+ DetileRow_16(src_y, src_tile_stride, dst_y, width);
+ dst_y += dst_stride_y;
+ src_y += 16;
+ // Advance to next row of tiles.
+ if ((y & (tile_height - 1)) == (tile_height - 1)) {
+ src_y = src_y - src_tile_stride + src_stride_y * tile_height;
+ }
+ }
+ return 0;
}
LIBYUV_API
@@ -1033,6 +1163,74 @@ void DetileSplitUVPlane(const uint8_t* src_uv,
}
}
+LIBYUV_API
+void DetileToYUY2(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_uv,
+ int src_stride_uv,
+ uint8_t* dst_yuy2,
+ int dst_stride_yuy2,
+ int width,
+ int height,
+ int tile_height) {
+ const ptrdiff_t src_y_tile_stride = 16 * tile_height;
+ const ptrdiff_t src_uv_tile_stride = src_y_tile_stride / 2;
+ int y;
+ void (*DetileToYUY2)(const uint8_t* src_y, ptrdiff_t src_y_tile_stride,
+ const uint8_t* src_uv, ptrdiff_t src_uv_tile_stride,
+ uint8_t* dst_yuy2, int width) = DetileToYUY2_C;
+ assert(src_stride_y >= 0);
+ assert(src_stride_y > 0);
+ assert(src_stride_uv >= 0);
+ assert(src_stride_uv > 0);
+ assert(tile_height > 0);
+
+ if (width <= 0 || height == 0 || tile_height <= 0) {
+ return;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ dst_yuy2 = dst_yuy2 + (height - 1) * dst_stride_yuy2;
+ dst_stride_yuy2 = -dst_stride_yuy2;
+ }
+
+#if defined(HAS_DETILETOYUY2_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ DetileToYUY2 = DetileToYUY2_Any_NEON;
+ if (IS_ALIGNED(width, 16)) {
+ DetileToYUY2 = DetileToYUY2_NEON;
+ }
+ }
+#endif
+
+#if defined(HAS_DETILETOYUY2_SSE2)
+ if (TestCpuFlag(kCpuHasSSE2)) {
+ DetileToYUY2 = DetileToYUY2_Any_SSE2;
+ if (IS_ALIGNED(width, 16)) {
+ DetileToYUY2 = DetileToYUY2_SSE2;
+ }
+ }
+#endif
+
+ // Detile plane
+ for (y = 0; y < height; ++y) {
+ DetileToYUY2(src_y, src_y_tile_stride, src_uv, src_uv_tile_stride, dst_yuy2,
+ width);
+ dst_yuy2 += dst_stride_yuy2;
+ src_y += 16;
+
+ if (y & 0x1)
+ src_uv += 16;
+
+ // Advance to next row of tiles.
+ if ((y & (tile_height - 1)) == (tile_height - 1)) {
+ src_y = src_y - src_y_tile_stride + src_stride_y * tile_height;
+ src_uv = src_uv - src_uv_tile_stride + src_stride_uv * (tile_height / 2);
+ }
+ }
+}
+
// Support function for NV12 etc RGB channels.
// Width and height are plane sizes (typically half pixel width).
LIBYUV_API
@@ -1085,6 +1283,11 @@ void SplitRGBPlane(const uint8_t* src_rgb,
}
}
#endif
+#if defined(HAS_SPLITRGBROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ SplitRGBRow = SplitRGBRow_RVV;
+ }
+#endif
for (y = 0; y < height; ++y) {
// Copy a row of RGB.
@@ -1144,6 +1347,11 @@ void MergeRGBPlane(const uint8_t* src_r,
}
}
#endif
+#if defined(HAS_MERGERGBROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ MergeRGBRow = MergeRGBRow_RVV;
+ }
+#endif
for (y = 0; y < height; ++y) {
// Merge a row of U and V into a row of RGB.
@@ -1156,18 +1364,18 @@ void MergeRGBPlane(const uint8_t* src_r,
}
LIBYUV_NOINLINE
-void SplitARGBPlaneAlpha(const uint8_t* src_argb,
- int src_stride_argb,
- uint8_t* dst_r,
- int dst_stride_r,
- uint8_t* dst_g,
- int dst_stride_g,
- uint8_t* dst_b,
- int dst_stride_b,
- uint8_t* dst_a,
- int dst_stride_a,
- int width,
- int height) {
+static void SplitARGBPlaneAlpha(const uint8_t* src_argb,
+ int src_stride_argb,
+ uint8_t* dst_r,
+ int dst_stride_r,
+ uint8_t* dst_g,
+ int dst_stride_g,
+ uint8_t* dst_b,
+ int dst_stride_b,
+ uint8_t* dst_a,
+ int dst_stride_a,
+ int width,
+ int height) {
int y;
void (*SplitARGBRow)(const uint8_t* src_rgb, uint8_t* dst_r, uint8_t* dst_g,
uint8_t* dst_b, uint8_t* dst_a, int width) =
@@ -1175,6 +1383,9 @@ void SplitARGBPlaneAlpha(const uint8_t* src_argb,
assert(height > 0);
+ if (width <= 0 || height == 0) {
+ return;
+ }
if (src_stride_argb == width * 4 && dst_stride_r == width &&
dst_stride_g == width && dst_stride_b == width && dst_stride_a == width) {
width *= height;
@@ -1215,6 +1426,11 @@ void SplitARGBPlaneAlpha(const uint8_t* src_argb,
}
}
#endif
+#if defined(HAS_SPLITARGBROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ SplitARGBRow = SplitARGBRow_RVV;
+ }
+#endif
for (y = 0; y < height; ++y) {
SplitARGBRow(src_argb, dst_r, dst_g, dst_b, dst_a, width);
@@ -1227,21 +1443,24 @@ void SplitARGBPlaneAlpha(const uint8_t* src_argb,
}
LIBYUV_NOINLINE
-void SplitARGBPlaneOpaque(const uint8_t* src_argb,
- int src_stride_argb,
- uint8_t* dst_r,
- int dst_stride_r,
- uint8_t* dst_g,
- int dst_stride_g,
- uint8_t* dst_b,
- int dst_stride_b,
- int width,
- int height) {
+static void SplitARGBPlaneOpaque(const uint8_t* src_argb,
+ int src_stride_argb,
+ uint8_t* dst_r,
+ int dst_stride_r,
+ uint8_t* dst_g,
+ int dst_stride_g,
+ uint8_t* dst_b,
+ int dst_stride_b,
+ int width,
+ int height) {
int y;
void (*SplitXRGBRow)(const uint8_t* src_rgb, uint8_t* dst_r, uint8_t* dst_g,
uint8_t* dst_b, int width) = SplitXRGBRow_C;
assert(height > 0);
+ if (width <= 0 || height == 0) {
+ return;
+ }
if (src_stride_argb == width * 4 && dst_stride_r == width &&
dst_stride_g == width && dst_stride_b == width) {
width *= height;
@@ -1281,6 +1500,11 @@ void SplitARGBPlaneOpaque(const uint8_t* src_argb,
}
}
#endif
+#if defined(HAS_SPLITXRGBROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ SplitXRGBRow = SplitXRGBRow_RVV;
+ }
+#endif
for (y = 0; y < height; ++y) {
SplitXRGBRow(src_argb, dst_r, dst_g, dst_b, width);
@@ -1328,18 +1552,18 @@ void SplitARGBPlane(const uint8_t* src_argb,
}
LIBYUV_NOINLINE
-void MergeARGBPlaneAlpha(const uint8_t* src_r,
- int src_stride_r,
- const uint8_t* src_g,
- int src_stride_g,
- const uint8_t* src_b,
- int src_stride_b,
- const uint8_t* src_a,
- int src_stride_a,
- uint8_t* dst_argb,
- int dst_stride_argb,
- int width,
- int height) {
+static void MergeARGBPlaneAlpha(const uint8_t* src_r,
+ int src_stride_r,
+ const uint8_t* src_g,
+ int src_stride_g,
+ const uint8_t* src_b,
+ int src_stride_b,
+ const uint8_t* src_a,
+ int src_stride_a,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ int width,
+ int height) {
int y;
void (*MergeARGBRow)(const uint8_t* src_r, const uint8_t* src_g,
const uint8_t* src_b, const uint8_t* src_a,
@@ -1347,6 +1571,9 @@ void MergeARGBPlaneAlpha(const uint8_t* src_r,
assert(height > 0);
+ if (width <= 0 || height == 0) {
+ return;
+ }
if (src_stride_r == width && src_stride_g == width && src_stride_b == width &&
src_stride_a == width && dst_stride_argb == width * 4) {
width *= height;
@@ -1378,6 +1605,11 @@ void MergeARGBPlaneAlpha(const uint8_t* src_r,
}
}
#endif
+#if defined(HAS_MERGEARGBROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ MergeARGBRow = MergeARGBRow_RVV;
+ }
+#endif
for (y = 0; y < height; ++y) {
MergeARGBRow(src_r, src_g, src_b, src_a, dst_argb, width);
@@ -1390,16 +1622,16 @@ void MergeARGBPlaneAlpha(const uint8_t* src_r,
}
LIBYUV_NOINLINE
-void MergeARGBPlaneOpaque(const uint8_t* src_r,
- int src_stride_r,
- const uint8_t* src_g,
- int src_stride_g,
- const uint8_t* src_b,
- int src_stride_b,
- uint8_t* dst_argb,
- int dst_stride_argb,
- int width,
- int height) {
+static void MergeARGBPlaneOpaque(const uint8_t* src_r,
+ int src_stride_r,
+ const uint8_t* src_g,
+ int src_stride_g,
+ const uint8_t* src_b,
+ int src_stride_b,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ int width,
+ int height) {
int y;
void (*MergeXRGBRow)(const uint8_t* src_r, const uint8_t* src_g,
const uint8_t* src_b, uint8_t* dst_argb, int width) =
@@ -1407,6 +1639,9 @@ void MergeARGBPlaneOpaque(const uint8_t* src_r,
assert(height > 0);
+ if (width <= 0 || height == 0) {
+ return;
+ }
if (src_stride_r == width && src_stride_g == width && src_stride_b == width &&
dst_stride_argb == width * 4) {
width *= height;
@@ -1437,6 +1672,11 @@ void MergeARGBPlaneOpaque(const uint8_t* src_r,
}
}
#endif
+#if defined(HAS_MERGEXRGBROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ MergeXRGBRow = MergeXRGBRow_RVV;
+ }
+#endif
for (y = 0; y < height; ++y) {
MergeXRGBRow(src_r, src_g, src_b, dst_argb, width);
@@ -1888,6 +2128,16 @@ int YUY2ToI422(const uint8_t* src_yuy2,
}
}
#endif
+#if defined(HAS_YUY2TOYROW_LSX) && defined(HAS_YUY2TOUV422ROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ YUY2ToYRow = YUY2ToYRow_Any_LSX;
+ YUY2ToUV422Row = YUY2ToUV422Row_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ YUY2ToYRow = YUY2ToYRow_LSX;
+ YUY2ToUV422Row = YUY2ToUV422Row_LSX;
+ }
+ }
+#endif
#if defined(HAS_YUY2TOYROW_LASX) && defined(HAS_YUY2TOUV422ROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
YUY2ToYRow = YUY2ToYRow_Any_LASX;
@@ -1984,6 +2234,16 @@ int UYVYToI422(const uint8_t* src_uyvy,
}
}
#endif
+#if defined(HAS_UYVYTOYROW_LSX) && defined(HAS_UYVYTOUV422ROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ UYVYToYRow = UYVYToYRow_Any_LSX;
+ UYVYToUV422Row = UYVYToUV422Row_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ UYVYToYRow = UYVYToYRow_LSX;
+ UYVYToUV422Row = UYVYToUV422Row_LSX;
+ }
+ }
+#endif
#if defined(HAS_UYVYTOYROW_LASX) && defined(HAS_UYVYTOUV422ROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
UYVYToYRow = UYVYToYRow_Any_LASX;
@@ -2131,6 +2391,14 @@ int UYVYToY(const uint8_t* src_uyvy,
}
}
#endif
+#if defined(HAS_UYVYTOYROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ UYVYToYRow = UYVYToYRow_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ UYVYToYRow = UYVYToYRow_LSX;
+ }
+ }
+#endif
for (y = 0; y < height; ++y) {
UYVYToYRow(src_uyvy, dst_y, width);
@@ -2189,6 +2457,14 @@ void MirrorPlane(const uint8_t* src_y,
}
}
#endif
+#if defined(HAS_MIRRORROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ MirrorRow = MirrorRow_Any_LSX;
+ if (IS_ALIGNED(width, 32)) {
+ MirrorRow = MirrorRow_LSX;
+ }
+ }
+#endif
#if defined(HAS_MIRRORROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
MirrorRow = MirrorRow_Any_LASX;
@@ -2255,6 +2531,14 @@ void MirrorUVPlane(const uint8_t* src_uv,
}
}
#endif
+#if defined(HAS_MIRRORUVROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ MirrorUVRow = MirrorUVRow_Any_LSX;
+ if (IS_ALIGNED(width, 8)) {
+ MirrorUVRow = MirrorUVRow_LSX;
+ }
+ }
+#endif
#if defined(HAS_MIRRORUVROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
MirrorUVRow = MirrorUVRow_Any_LASX;
@@ -2427,6 +2711,14 @@ int ARGBMirror(const uint8_t* src_argb,
}
}
#endif
+#if defined(HAS_ARGBMIRRORROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ ARGBMirrorRow = ARGBMirrorRow_Any_LSX;
+ if (IS_ALIGNED(width, 8)) {
+ ARGBMirrorRow = ARGBMirrorRow_LSX;
+ }
+ }
+#endif
#if defined(HAS_ARGBMIRRORROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
ARGBMirrorRow = ARGBMirrorRow_Any_LASX;
@@ -2491,37 +2783,6 @@ int RGB24Mirror(const uint8_t* src_rgb24,
return 0;
}
-// Get a blender that optimized for the CPU and pixel count.
-// As there are 6 blenders to choose from, the caller should try to use
-// the same blend function for all pixels if possible.
-LIBYUV_API
-ARGBBlendRow GetARGBBlend() {
- void (*ARGBBlendRow)(const uint8_t* src_argb, const uint8_t* src_argb1,
- uint8_t* dst_argb, int width) = ARGBBlendRow_C;
-#if defined(HAS_ARGBBLENDROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3)) {
- ARGBBlendRow = ARGBBlendRow_SSSE3;
- return ARGBBlendRow;
- }
-#endif
-#if defined(HAS_ARGBBLENDROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- ARGBBlendRow = ARGBBlendRow_NEON;
- }
-#endif
-#if defined(HAS_ARGBBLENDROW_MSA)
- if (TestCpuFlag(kCpuHasMSA)) {
- ARGBBlendRow = ARGBBlendRow_MSA;
- }
-#endif
-#if defined(HAS_ARGBBLENDROW_LSX)
- if (TestCpuFlag(kCpuHasLSX)) {
- ARGBBlendRow = ARGBBlendRow_LSX;
- }
-#endif
- return ARGBBlendRow;
-}
-
// Alpha Blend 2 ARGB images and store to destination.
LIBYUV_API
int ARGBBlend(const uint8_t* src_argb0,
@@ -2534,7 +2795,7 @@ int ARGBBlend(const uint8_t* src_argb0,
int height) {
int y;
void (*ARGBBlendRow)(const uint8_t* src_argb, const uint8_t* src_argb1,
- uint8_t* dst_argb, int width) = GetARGBBlend();
+ uint8_t* dst_argb, int width) = ARGBBlendRow_C;
if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) {
return -1;
}
@@ -2551,7 +2812,31 @@ int ARGBBlend(const uint8_t* src_argb0,
height = 1;
src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0;
}
-
+#if defined(HAS_ARGBBLENDROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ ARGBBlendRow = ARGBBlendRow_SSSE3;
+ }
+#endif
+#if defined(HAS_ARGBBLENDROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ ARGBBlendRow = ARGBBlendRow_NEON;
+ }
+#endif
+#if defined(HAS_ARGBBLENDROW_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ ARGBBlendRow = ARGBBlendRow_MSA;
+ }
+#endif
+#if defined(HAS_ARGBBLENDROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ ARGBBlendRow = ARGBBlendRow_LSX;
+ }
+#endif
+#if defined(HAS_ARGBBLENDROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ ARGBBlendRow = ARGBBlendRow_RVV;
+ }
+#endif
for (y = 0; y < height; ++y) {
ARGBBlendRow(src_argb0, src_argb1, dst_argb, width);
src_argb0 += src_stride_argb0;
@@ -2611,6 +2896,11 @@ int BlendPlane(const uint8_t* src_y0,
}
}
#endif
+#if defined(HAS_BLENDPLANEROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ BlendPlaneRow = BlendPlaneRow_RVV;
+ }
+#endif
for (y = 0; y < height; ++y) {
BlendPlaneRow(src_y0, src_y1, alpha, dst_y, width);
@@ -2688,6 +2978,11 @@ int I420Blend(const uint8_t* src_y0,
}
}
#endif
+#if defined(HAS_BLENDPLANEROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ BlendPlaneRow = BlendPlaneRow_RVV;
+ }
+#endif
if (!IS_ALIGNED(width, 2)) {
ScaleRowDown2 = ScaleRowDown2Box_Odd_C;
}
@@ -2724,9 +3019,16 @@ int I420Blend(const uint8_t* src_y0,
}
}
#endif
+#if defined(HAS_SCALEROWDOWN2_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ ScaleRowDown2 = ScaleRowDown2Box_RVV;
+ }
+#endif
// Row buffer for intermediate alpha pixels.
align_buffer_64(halfalpha, halfwidth);
+ if (!halfalpha)
+ return 1;
for (y = 0; y < height; y += 2) {
// last row of odd height image use 1 row of alpha instead of 2.
if (y == (height - 1)) {
@@ -2809,6 +3111,14 @@ int ARGBMultiply(const uint8_t* src_argb0,
}
}
#endif
+#if defined(HAS_ARGBMULTIPLYROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ ARGBMultiplyRow = ARGBMultiplyRow_Any_LSX;
+ if (IS_ALIGNED(width, 4)) {
+ ARGBMultiplyRow = ARGBMultiplyRow_LSX;
+ }
+ }
+#endif
#if defined(HAS_ARGBMULTIPLYROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
ARGBMultiplyRow = ARGBMultiplyRow_Any_LASX;
@@ -2894,6 +3204,14 @@ int ARGBAdd(const uint8_t* src_argb0,
}
}
#endif
+#if defined(HAS_ARGBADDROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ ARGBAddRow = ARGBAddRow_Any_LSX;
+ if (IS_ALIGNED(width, 4)) {
+ ARGBAddRow = ARGBAddRow_LSX;
+ }
+ }
+#endif
#if defined(HAS_ARGBADDROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
ARGBAddRow = ARGBAddRow_Any_LASX;
@@ -2974,6 +3292,14 @@ int ARGBSubtract(const uint8_t* src_argb0,
}
}
#endif
+#if defined(HAS_ARGBSUBTRACTROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ ARGBSubtractRow = ARGBSubtractRow_Any_LSX;
+ if (IS_ALIGNED(width, 4)) {
+ ARGBSubtractRow = ARGBSubtractRow_LSX;
+ }
+ }
+#endif
#if defined(HAS_ARGBSUBTRACTROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
ARGBSubtractRow = ARGBSubtractRow_Any_LASX;
@@ -3051,6 +3377,11 @@ int RAWToRGB24(const uint8_t* src_raw,
}
}
#endif
+#if defined(HAS_RAWTORGB24ROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ RAWToRGB24Row = RAWToRGB24Row_RVV;
+ }
+#endif
for (y = 0; y < height; ++y) {
RAWToRGB24Row(src_raw, dst_rgb24, width);
@@ -3060,6 +3391,7 @@ int RAWToRGB24(const uint8_t* src_raw,
return 0;
}
+// TODO(fbarchard): Consider uint8_t value
LIBYUV_API
void SetPlane(uint8_t* dst_y,
int dst_stride_y,
@@ -3067,7 +3399,7 @@ void SetPlane(uint8_t* dst_y,
int height,
uint32_t value) {
int y;
- void (*SetRow)(uint8_t * dst, uint8_t value, int width) = SetRow_C;
+ void (*SetRow)(uint8_t* dst, uint8_t value, int width) = SetRow_C;
if (width <= 0 || height == 0) {
return;
@@ -3120,7 +3452,7 @@ void SetPlane(uint8_t* dst_y,
// Set plane
for (y = 0; y < height; ++y) {
- SetRow(dst_y, value, width);
+ SetRow(dst_y, (uint8_t)value, width);
dst_y += dst_stride_y;
}
}
@@ -3168,7 +3500,7 @@ int ARGBRect(uint8_t* dst_argb,
int height,
uint32_t value) {
int y;
- void (*ARGBSetRow)(uint8_t * dst_argb, uint32_t value, int width) =
+ void (*ARGBSetRow)(uint8_t* dst_argb, uint32_t value, int width) =
ARGBSetRow_C;
if (!dst_argb || width <= 0 || height == 0 || dst_x < 0 || dst_y < 0) {
return -1;
@@ -3293,6 +3625,14 @@ int ARGBAttenuate(const uint8_t* src_argb,
}
}
#endif
+#if defined(HAS_ARGBATTENUATEROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ ARGBAttenuateRow = ARGBAttenuateRow_Any_LSX;
+ if (IS_ALIGNED(width, 8)) {
+ ARGBAttenuateRow = ARGBAttenuateRow_LSX;
+ }
+ }
+#endif
#if defined(HAS_ARGBATTENUATEROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
ARGBAttenuateRow = ARGBAttenuateRow_Any_LASX;
@@ -3301,6 +3641,11 @@ int ARGBAttenuate(const uint8_t* src_argb,
}
}
#endif
+#if defined(HAS_ARGBATTENUATEROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ ARGBAttenuateRow = ARGBAttenuateRow_RVV;
+ }
+#endif
for (y = 0; y < height; ++y) {
ARGBAttenuateRow(src_argb, dst_argb, width);
@@ -3401,6 +3746,11 @@ int ARGBGrayTo(const uint8_t* src_argb,
ARGBGrayRow = ARGBGrayRow_MSA;
}
#endif
+#if defined(HAS_ARGBGRAYROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX) && IS_ALIGNED(width, 8)) {
+ ARGBGrayRow = ARGBGrayRow_LSX;
+ }
+#endif
#if defined(HAS_ARGBGRAYROW_LASX)
if (TestCpuFlag(kCpuHasLASX) && IS_ALIGNED(width, 16)) {
ARGBGrayRow = ARGBGrayRow_LASX;
@@ -3451,6 +3801,11 @@ int ARGBGray(uint8_t* dst_argb,
ARGBGrayRow = ARGBGrayRow_MSA;
}
#endif
+#if defined(HAS_ARGBGRAYROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX) && IS_ALIGNED(width, 8)) {
+ ARGBGrayRow = ARGBGrayRow_LSX;
+ }
+#endif
#if defined(HAS_ARGBGRAYROW_LASX)
if (TestCpuFlag(kCpuHasLASX) && IS_ALIGNED(width, 16)) {
ARGBGrayRow = ARGBGrayRow_LASX;
@@ -3473,7 +3828,7 @@ int ARGBSepia(uint8_t* dst_argb,
int width,
int height) {
int y;
- void (*ARGBSepiaRow)(uint8_t * dst_argb, int width) = ARGBSepiaRow_C;
+ void (*ARGBSepiaRow)(uint8_t* dst_argb, int width) = ARGBSepiaRow_C;
uint8_t* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
if (!dst_argb || width <= 0 || height <= 0 || dst_x < 0 || dst_y < 0) {
return -1;
@@ -3499,6 +3854,11 @@ int ARGBSepia(uint8_t* dst_argb,
ARGBSepiaRow = ARGBSepiaRow_MSA;
}
#endif
+#if defined(HAS_ARGBSEPIAROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX) && IS_ALIGNED(width, 8)) {
+ ARGBSepiaRow = ARGBSepiaRow_LSX;
+ }
+#endif
#if defined(HAS_ARGBSEPIAROW_LASX)
if (TestCpuFlag(kCpuHasLASX) && IS_ALIGNED(width, 16)) {
ARGBSepiaRow = ARGBSepiaRow_LASX;
@@ -3616,7 +3976,7 @@ int ARGBColorTable(uint8_t* dst_argb,
int width,
int height) {
int y;
- void (*ARGBColorTableRow)(uint8_t * dst_argb, const uint8_t* table_argb,
+ void (*ARGBColorTableRow)(uint8_t* dst_argb, const uint8_t* table_argb,
int width) = ARGBColorTableRow_C;
uint8_t* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
if (!dst_argb || !table_argb || width <= 0 || height <= 0 || dst_x < 0 ||
@@ -3652,7 +4012,7 @@ int RGBColorTable(uint8_t* dst_argb,
int width,
int height) {
int y;
- void (*RGBColorTableRow)(uint8_t * dst_argb, const uint8_t* table_argb,
+ void (*RGBColorTableRow)(uint8_t* dst_argb, const uint8_t* table_argb,
int width) = RGBColorTableRow_C;
uint8_t* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
if (!dst_argb || !table_argb || width <= 0 || height <= 0 || dst_x < 0 ||
@@ -3697,7 +4057,7 @@ int ARGBQuantize(uint8_t* dst_argb,
int width,
int height) {
int y;
- void (*ARGBQuantizeRow)(uint8_t * dst_argb, int scale, int interval_size,
+ void (*ARGBQuantizeRow)(uint8_t* dst_argb, int scale, int interval_size,
int interval_offset, int width) = ARGBQuantizeRow_C;
uint8_t* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
if (!dst_argb || width <= 0 || height <= 0 || dst_x < 0 || dst_y < 0 ||
@@ -3924,6 +4284,11 @@ int ARGBShade(const uint8_t* src_argb,
ARGBShadeRow = ARGBShadeRow_MSA;
}
#endif
+#if defined(HAS_ARGBSHADEROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX) && IS_ALIGNED(width, 4)) {
+ ARGBShadeRow = ARGBShadeRow_LSX;
+ }
+#endif
#if defined(HAS_ARGBSHADEROW_LASX)
if (TestCpuFlag(kCpuHasLASX) && IS_ALIGNED(width, 8)) {
ARGBShadeRow = ARGBShadeRow_LASX;
@@ -3950,7 +4315,7 @@ int InterpolatePlane(const uint8_t* src0,
int height,
int interpolation) {
int y;
- void (*InterpolateRow)(uint8_t * dst_ptr, const uint8_t* src_ptr,
+ void (*InterpolateRow)(uint8_t* dst_ptr, const uint8_t* src_ptr,
ptrdiff_t src_stride, int dst_width,
int source_y_fraction) = InterpolateRow_C;
if (!src0 || !src1 || !dst || width <= 0 || height == 0) {
@@ -4008,6 +4373,11 @@ int InterpolatePlane(const uint8_t* src0,
}
}
#endif
+#if defined(HAS_INTERPOLATEROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ InterpolateRow = InterpolateRow_RVV;
+ }
+#endif
for (y = 0; y < height; ++y) {
InterpolateRow(dst, src0, src1 - src0, width, interpolation);
@@ -4030,7 +4400,7 @@ int InterpolatePlane_16(const uint16_t* src0,
int height,
int interpolation) {
int y;
- void (*InterpolateRow_16)(uint16_t * dst_ptr, const uint16_t* src_ptr,
+ void (*InterpolateRow_16)(uint16_t* dst_ptr, const uint16_t* src_ptr,
ptrdiff_t src_stride, int dst_width,
int source_y_fraction) = InterpolateRow_16_C;
if (!src0 || !src1 || !dst || width <= 0 || height == 0) {
@@ -4213,6 +4583,14 @@ int ARGBShuffle(const uint8_t* src_bgra,
}
}
#endif
+#if defined(HAS_ARGBSHUFFLEROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ ARGBShuffleRow = ARGBShuffleRow_Any_LSX;
+ if (IS_ALIGNED(width, 8)) {
+ ARGBShuffleRow = ARGBShuffleRow_LSX;
+ }
+ }
+#endif
#if defined(HAS_ARGBSHUFFLEROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
ARGBShuffleRow = ARGBShuffleRow_Any_LASX;
@@ -4334,6 +4712,8 @@ int GaussPlane_F32(const float* src,
{
// 2 pixels on each side, but aligned out to 16 bytes.
align_buffer_64(rowbuf, (4 + width + 4) * 4);
+ if (!rowbuf)
+ return 1;
memset(rowbuf, 0, 16);
memset(rowbuf + (4 + width) * 4, 0, 16);
float* row = (float*)(rowbuf + 16);
@@ -4444,6 +4824,11 @@ static int ARGBSobelize(const uint8_t* src_argb,
}
}
#endif
+#if defined(HAS_ARGBTOYJROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ ARGBToYJRow = ARGBToYJRow_RVV;
+ }
+#endif
#if defined(HAS_SOBELYROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
@@ -4477,16 +4862,18 @@ static int ARGBSobelize(const uint8_t* src_argb,
#endif
{
// 3 rows with edges before/after.
- const int kRowSize = (width + kEdge + 31) & ~31;
- align_buffer_64(rows, kRowSize * 2 + (kEdge + kRowSize * 3 + kEdge));
+ const int row_size = (width + kEdge + 31) & ~31;
+ align_buffer_64(rows, row_size * 2 + (kEdge + row_size * 3 + kEdge));
uint8_t* row_sobelx = rows;
- uint8_t* row_sobely = rows + kRowSize;
- uint8_t* row_y = rows + kRowSize * 2;
+ uint8_t* row_sobely = rows + row_size;
+ uint8_t* row_y = rows + row_size * 2;
// Convert first row.
uint8_t* row_y0 = row_y + kEdge;
- uint8_t* row_y1 = row_y0 + kRowSize;
- uint8_t* row_y2 = row_y1 + kRowSize;
+ uint8_t* row_y1 = row_y0 + row_size;
+ uint8_t* row_y2 = row_y1 + row_size;
+ if (!rows)
+ return 1;
ARGBToYJRow(src_argb, row_y0, width);
row_y0[-1] = row_y0[0];
memset(row_y0 + width, row_y0[width - 1], 16); // Extrude 16 for valgrind.
@@ -4967,6 +5354,11 @@ int ARGBExtractAlpha(const uint8_t* src_argb,
: ARGBExtractAlphaRow_Any_LSX;
}
#endif
+#if defined(HAS_ARGBEXTRACTALPHAROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ ARGBExtractAlphaRow = ARGBExtractAlphaRow_RVV;
+ }
+#endif
for (int y = 0; y < height; ++y) {
ARGBExtractAlphaRow(src_argb, dst_a, width);
@@ -5018,6 +5410,11 @@ int ARGBCopyYToAlpha(const uint8_t* src_y,
}
}
#endif
+#if defined(HAS_ARGBCOPYYTOALPHAROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ ARGBCopyYToAlphaRow = ARGBCopyYToAlphaRow_RVV;
+ }
+#endif
for (y = 0; y < height; ++y) {
ARGBCopyYToAlphaRow(src_y, dst_argb, width);
@@ -5027,9 +5424,6 @@ int ARGBCopyYToAlpha(const uint8_t* src_y,
return 0;
}
-// TODO(fbarchard): Consider if width is even Y channel can be split
-// directly. A SplitUVRow_Odd function could copy the remaining chroma.
-
LIBYUV_API
int YUY2ToNV12(const uint8_t* src_yuy2,
int src_stride_yuy2,
@@ -5040,13 +5434,10 @@ int YUY2ToNV12(const uint8_t* src_yuy2,
int width,
int height) {
int y;
- int halfwidth = (width + 1) >> 1;
- void (*SplitUVRow)(const uint8_t* src_uv, uint8_t* dst_u, uint8_t* dst_v,
- int width) = SplitUVRow_C;
- void (*InterpolateRow)(uint8_t * dst_ptr, const uint8_t* src_ptr,
- ptrdiff_t src_stride, int dst_width,
- int source_y_fraction) = InterpolateRow_C;
-
+ void (*YUY2ToYRow)(const uint8_t* src_yuy2, uint8_t* dst_y, int width) =
+ YUY2ToYRow_C;
+ void (*YUY2ToNVUVRow)(const uint8_t* src_yuy2, int stride_yuy2,
+ uint8_t* dst_uv, int width) = YUY2ToNVUVRow_C;
if (!src_yuy2 || !dst_y || !dst_uv || width <= 0 || height == 0) {
return -1;
}
@@ -5057,109 +5448,91 @@ int YUY2ToNV12(const uint8_t* src_yuy2,
src_yuy2 = src_yuy2 + (height - 1) * src_stride_yuy2;
src_stride_yuy2 = -src_stride_yuy2;
}
-#if defined(HAS_SPLITUVROW_SSE2)
+#if defined(HAS_YUY2TOYROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
- SplitUVRow = SplitUVRow_Any_SSE2;
+ YUY2ToYRow = YUY2ToYRow_Any_SSE2;
if (IS_ALIGNED(width, 16)) {
- SplitUVRow = SplitUVRow_SSE2;
+ YUY2ToYRow = YUY2ToYRow_SSE2;
}
}
#endif
-#if defined(HAS_SPLITUVROW_AVX2)
+#if defined(HAS_YUY2TOYROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
- SplitUVRow = SplitUVRow_Any_AVX2;
+ YUY2ToYRow = YUY2ToYRow_Any_AVX2;
if (IS_ALIGNED(width, 32)) {
- SplitUVRow = SplitUVRow_AVX2;
+ YUY2ToYRow = YUY2ToYRow_AVX2;
}
}
#endif
-#if defined(HAS_SPLITUVROW_NEON)
+#if defined(HAS_YUY2TOYROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
- SplitUVRow = SplitUVRow_Any_NEON;
+ YUY2ToYRow = YUY2ToYRow_Any_NEON;
if (IS_ALIGNED(width, 16)) {
- SplitUVRow = SplitUVRow_NEON;
+ YUY2ToYRow = YUY2ToYRow_NEON;
}
}
#endif
-#if defined(HAS_SPLITUVROW_MSA)
+#if defined(HAS_YUY2TOYROW_MSA) && defined(HAS_YUY2TOUV422ROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
- SplitUVRow = SplitUVRow_Any_MSA;
+ YUY2ToYRow = YUY2ToYRow_Any_MSA;
if (IS_ALIGNED(width, 32)) {
- SplitUVRow = SplitUVRow_MSA;
+ YUY2ToYRow = YUY2ToYRow_MSA;
}
}
#endif
-#if defined(HAS_SPLITUVROW_LSX)
+#if defined(HAS_YUY2TOYROW_LSX) && defined(HAS_YUY2TOUV422ROW_LSX)
if (TestCpuFlag(kCpuHasLSX)) {
- SplitUVRow = SplitUVRow_Any_LSX;
- if (IS_ALIGNED(width, 32)) {
- SplitUVRow = SplitUVRow_LSX;
- }
- }
-#endif
-#if defined(HAS_INTERPOLATEROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3)) {
- InterpolateRow = InterpolateRow_Any_SSSE3;
+ YUY2ToYRow = YUY2ToYRow_Any_LSX;
if (IS_ALIGNED(width, 16)) {
- InterpolateRow = InterpolateRow_SSSE3;
+ YUY2ToYRow = YUY2ToYRow_LSX;
}
}
#endif
-#if defined(HAS_INTERPOLATEROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2)) {
- InterpolateRow = InterpolateRow_Any_AVX2;
+#if defined(HAS_YUY2TOYROW_LASX) && defined(HAS_YUY2TOUV422ROW_LASX)
+ if (TestCpuFlag(kCpuHasLASX)) {
+ YUY2ToYRow = YUY2ToYRow_Any_LASX;
if (IS_ALIGNED(width, 32)) {
- InterpolateRow = InterpolateRow_AVX2;
+ YUY2ToYRow = YUY2ToYRow_LASX;
}
}
#endif
-#if defined(HAS_INTERPOLATEROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- InterpolateRow = InterpolateRow_Any_NEON;
+
+#if defined(HAS_YUY2TONVUVROW_SSE2)
+ if (TestCpuFlag(kCpuHasSSE2)) {
+ YUY2ToNVUVRow = YUY2ToNVUVRow_Any_SSE2;
if (IS_ALIGNED(width, 16)) {
- InterpolateRow = InterpolateRow_NEON;
+ YUY2ToNVUVRow = YUY2ToNVUVRow_SSE2;
}
}
#endif
-#if defined(HAS_INTERPOLATEROW_MSA)
- if (TestCpuFlag(kCpuHasMSA)) {
- InterpolateRow = InterpolateRow_Any_MSA;
+#if defined(HAS_YUY2TONVUVROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ YUY2ToNVUVRow = YUY2ToNVUVRow_Any_AVX2;
if (IS_ALIGNED(width, 32)) {
- InterpolateRow = InterpolateRow_MSA;
+ YUY2ToNVUVRow = YUY2ToNVUVRow_AVX2;
}
}
#endif
-#if defined(HAS_INTERPOLATEROW_LSX)
- if (TestCpuFlag(kCpuHasLSX)) {
- InterpolateRow = InterpolateRow_Any_LSX;
- if (IS_ALIGNED(width, 32)) {
- InterpolateRow = InterpolateRow_LSX;
+#if defined(HAS_YUY2TONVUVROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ YUY2ToNVUVRow = YUY2ToNVUVRow_Any_NEON;
+ if (IS_ALIGNED(width, 16)) {
+ YUY2ToNVUVRow = YUY2ToNVUVRow_NEON;
}
}
#endif
- {
- int awidth = halfwidth * 2;
- // row of y and 2 rows of uv
- align_buffer_64(rows, awidth * 3);
-
- for (y = 0; y < height - 1; y += 2) {
- // Split Y from UV.
- SplitUVRow(src_yuy2, rows, rows + awidth, awidth);
- memcpy(dst_y, rows, width);
- SplitUVRow(src_yuy2 + src_stride_yuy2, rows, rows + awidth * 2, awidth);
- memcpy(dst_y + dst_stride_y, rows, width);
- InterpolateRow(dst_uv, rows + awidth, awidth, awidth, 128);
- src_yuy2 += src_stride_yuy2 * 2;
- dst_y += dst_stride_y * 2;
- dst_uv += dst_stride_uv;
- }
- if (height & 1) {
- // Split Y from UV.
- SplitUVRow(src_yuy2, rows, dst_uv, awidth);
- memcpy(dst_y, rows, width);
- }
- free_aligned_buffer_64(rows);
+ for (y = 0; y < height - 1; y += 2) {
+ YUY2ToYRow(src_yuy2, dst_y, width);
+ YUY2ToYRow(src_yuy2 + src_stride_yuy2, dst_y + dst_stride_y, width);
+ YUY2ToNVUVRow(src_yuy2, src_stride_yuy2, dst_uv, width);
+ src_yuy2 += src_stride_yuy2 * 2;
+ dst_y += dst_stride_y * 2;
+ dst_uv += dst_stride_uv;
+ }
+ if (height & 1) {
+ YUY2ToYRow(src_yuy2, dst_y, width);
+ YUY2ToNVUVRow(src_yuy2, 0, dst_uv, width);
}
return 0;
}
@@ -5177,7 +5550,7 @@ int UYVYToNV12(const uint8_t* src_uyvy,
int halfwidth = (width + 1) >> 1;
void (*SplitUVRow)(const uint8_t* src_uv, uint8_t* dst_u, uint8_t* dst_v,
int width) = SplitUVRow_C;
- void (*InterpolateRow)(uint8_t * dst_ptr, const uint8_t* src_ptr,
+ void (*InterpolateRow)(uint8_t* dst_ptr, const uint8_t* src_ptr,
ptrdiff_t src_stride, int dst_width,
int source_y_fraction) = InterpolateRow_C;
@@ -5231,6 +5604,12 @@ int UYVYToNV12(const uint8_t* src_uyvy,
}
}
#endif
+#if defined(HAS_SPLITUVROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ SplitUVRow = SplitUVRow_RVV;
+ }
+#endif
+
#if defined(HAS_INTERPOLATEROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
InterpolateRow = InterpolateRow_Any_SSSE3;
@@ -5271,11 +5650,18 @@ int UYVYToNV12(const uint8_t* src_uyvy,
}
}
#endif
+#if defined(HAS_INTERPOLATEROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ InterpolateRow = InterpolateRow_RVV;
+ }
+#endif
{
int awidth = halfwidth * 2;
// row of y and 2 rows of uv
align_buffer_64(rows, awidth * 3);
+ if (!rows)
+ return 1;
for (y = 0; y < height - 1; y += 2) {
// Split Y from UV.
@@ -5336,6 +5722,7 @@ void HalfMergeUVPlane(const uint8_t* src_u,
HalfMergeUVRow = HalfMergeUVRow_AVX2;
}
#endif
+
for (y = 0; y < height - 1; y += 2) {
// Merge a row of U and V into a row of UV.
HalfMergeUVRow(src_u, src_stride_u, src_v, src_stride_v, dst_uv, width);
diff --git a/files/source/rotate.cc b/source/rotate.cc
index f1e83cbd..3f8332c3 100644
--- a/files/source/rotate.cc
+++ b/source/rotate.cc
@@ -8,6 +8,8 @@
* be found in the AUTHORS file in the root of the source tree.
*/
+#include <assert.h>
+
#include "libyuv/rotate.h"
#include "libyuv/convert.h"
@@ -138,8 +140,11 @@ void RotatePlane180(const uint8_t* src,
int dst_stride,
int width,
int height) {
- // Swap first and last row and mirror the content. Uses a temporary row.
+ // Swap top and bottom row and mirror the content. Uses a temporary row.
align_buffer_64(row, width);
+ assert(row);
+ if (!row)
+ return;
const uint8_t* src_bot = src + src_stride * (height - 1);
uint8_t* dst_bot = dst + dst_stride * (height - 1);
int half_height = (height + 1) >> 1;
@@ -178,6 +183,14 @@ void RotatePlane180(const uint8_t* src,
}
}
#endif
+#if defined(HAS_MIRRORROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ MirrorRow = MirrorRow_Any_LSX;
+ if (IS_ALIGNED(width, 32)) {
+ MirrorRow = MirrorRow_LSX;
+ }
+ }
+#endif
#if defined(HAS_MIRRORROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
MirrorRow = MirrorRow_Any_LASX;
@@ -206,12 +219,17 @@ void RotatePlane180(const uint8_t* src,
CopyRow = IS_ALIGNED(width, 32) ? CopyRow_NEON : CopyRow_Any_NEON;
}
#endif
+#if defined(HAS_COPYROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ CopyRow = CopyRow_RVV;
+ }
+#endif
// Odd height will harmlessly mirror the middle row twice.
for (y = 0; y < half_height; ++y) {
- CopyRow(src, row, width); // Copy first row into buffer
- MirrorRow(src_bot, dst, width); // Mirror last row into first row
- MirrorRow(row, dst_bot, width); // Mirror buffer into last row
+ CopyRow(src, row, width); // Copy top row into buffer
+ MirrorRow(src_bot, dst, width); // Mirror bottom row into top row
+ MirrorRow(row, dst_bot, width); // Mirror buffer into bottom row
src += src_stride;
dst += dst_stride;
src_bot -= src_stride;
@@ -476,6 +494,124 @@ int RotatePlane(const uint8_t* src,
return -1;
}
+static void TransposePlane_16(const uint16_t* src,
+ int src_stride,
+ uint16_t* dst,
+ int dst_stride,
+ int width,
+ int height) {
+ int i = height;
+ // Work across the source in 8x8 tiles
+ while (i >= 8) {
+ TransposeWx8_16_C(src, src_stride, dst, dst_stride, width);
+ src += 8 * src_stride; // Go down 8 rows.
+ dst += 8; // Move over 8 columns.
+ i -= 8;
+ }
+
+ if (i > 0) {
+ TransposeWxH_16_C(src, src_stride, dst, dst_stride, width, i);
+ }
+}
+
+static void RotatePlane90_16(const uint16_t* src,
+ int src_stride,
+ uint16_t* dst,
+ int dst_stride,
+ int width,
+ int height) {
+ // Rotate by 90 is a transpose with the source read
+ // from bottom to top. So set the source pointer to the end
+ // of the buffer and flip the sign of the source stride.
+ src += src_stride * (height - 1);
+ src_stride = -src_stride;
+ TransposePlane_16(src, src_stride, dst, dst_stride, width, height);
+}
+
+static void RotatePlane270_16(const uint16_t* src,
+ int src_stride,
+ uint16_t* dst,
+ int dst_stride,
+ int width,
+ int height) {
+ // Rotate by 270 is a transpose with the destination written
+ // from bottom to top. So set the destination pointer to the end
+ // of the buffer and flip the sign of the destination stride.
+ dst += dst_stride * (width - 1);
+ dst_stride = -dst_stride;
+ TransposePlane_16(src, src_stride, dst, dst_stride, width, height);
+}
+
+static void RotatePlane180_16(const uint16_t* src,
+ int src_stride,
+ uint16_t* dst,
+ int dst_stride,
+ int width,
+ int height) {
+ const uint16_t* src_bot = src + src_stride * (height - 1);
+ uint16_t* dst_bot = dst + dst_stride * (height - 1);
+ int half_height = (height + 1) >> 1;
+ int y;
+
+ // Swap top and bottom row and mirror the content. Uses a temporary row.
+ align_buffer_64(row, width * 2);
+ uint16_t* row_tmp = (uint16_t*)row;
+ assert(row);
+ if (!row)
+ return;
+
+ // Odd height will harmlessly mirror the middle row twice.
+ for (y = 0; y < half_height; ++y) {
+ CopyRow_16_C(src, row_tmp, width); // Copy top row into buffer
+ MirrorRow_16_C(src_bot, dst, width); // Mirror bottom row into top row
+ MirrorRow_16_C(row_tmp, dst_bot, width); // Mirror buffer into bottom row
+ src += src_stride;
+ dst += dst_stride;
+ src_bot -= src_stride;
+ dst_bot -= dst_stride;
+ }
+ free_aligned_buffer_64(row);
+}
+
+LIBYUV_API
+int RotatePlane_16(const uint16_t* src,
+ int src_stride,
+ uint16_t* dst,
+ int dst_stride,
+ int width,
+ int height,
+ enum RotationMode mode) {
+ if (!src || width <= 0 || height == 0 || !dst) {
+ return -1;
+ }
+
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ src = src + (height - 1) * src_stride;
+ src_stride = -src_stride;
+ }
+
+ switch (mode) {
+ case kRotate0:
+ // copy frame
+ CopyPlane_16(src, src_stride, dst, dst_stride, width, height);
+ return 0;
+ case kRotate90:
+ RotatePlane90_16(src, src_stride, dst, dst_stride, width, height);
+ return 0;
+ case kRotate270:
+ RotatePlane270_16(src, src_stride, dst, dst_stride, width, height);
+ return 0;
+ case kRotate180:
+ RotatePlane180_16(src, src_stride, dst, dst_stride, width, height);
+ return 0;
+ default:
+ break;
+ }
+ return -1;
+}
+
LIBYUV_API
int I420Rotate(const uint8_t* src_y,
int src_stride_y,
@@ -544,6 +680,8 @@ int I420Rotate(const uint8_t* src_y,
return -1;
}
+// I422 has half width x full height UV planes, so rotate by 90 and 270
+// require scaling to maintain 422 subsampling.
LIBYUV_API
int I422Rotate(const uint8_t* src_y,
int src_stride_y,
@@ -562,6 +700,7 @@ int I422Rotate(const uint8_t* src_y,
enum RotationMode mode) {
int halfwidth = (width + 1) >> 1;
int halfheight = (height + 1) >> 1;
+ int r;
if (!src_y || !src_u || !src_v || width <= 0 || height == 0 || !dst_y ||
!dst_u || !dst_v) {
return -1;
@@ -579,31 +718,54 @@ int I422Rotate(const uint8_t* src_y,
switch (mode) {
case kRotate0:
- // copy frame
+ // Copy frame
CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
CopyPlane(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth, height);
CopyPlane(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth, height);
return 0;
+
+ // Note on temporary Y plane for UV.
+ // Rotation of UV first fits within the Y destination plane rows.
+ // Y plane is width x height
+ // Y plane rotated is height x width
+ // UV plane is (width / 2) x height
+ // UV plane rotated is height x (width / 2)
+ // UV plane rotated+scaled is (height / 2) x width.
+ // UV plane rotated is a temporary that fits within the Y plane rotated.
+
case kRotate90:
- // We need to rotate and rescale, we use plane Y as temporal storage.
- RotatePlane90(src_u, src_stride_u, dst_y, height, halfwidth, height);
- ScalePlane(dst_y, height, height, halfwidth, dst_u, halfheight,
- halfheight, width, kFilterBilinear);
- RotatePlane90(src_v, src_stride_v, dst_y, height, halfwidth, height);
- ScalePlane(dst_y, height, height, halfwidth, dst_v, halfheight,
- halfheight, width, kFilterLinear);
+ RotatePlane90(src_u, src_stride_u, dst_y, dst_stride_y, halfwidth,
+ height);
+ r = ScalePlane(dst_y, dst_stride_y, height, halfwidth, dst_u,
+ dst_stride_u, halfheight, width, kFilterBilinear);
+ if (r != 0) {
+ return r;
+ }
+ RotatePlane90(src_v, src_stride_v, dst_y, dst_stride_y, halfwidth,
+ height);
+ r = ScalePlane(dst_y, dst_stride_y, height, halfwidth, dst_v,
+ dst_stride_v, halfheight, width, kFilterLinear);
+ if (r != 0) {
+ return r;
+ }
RotatePlane90(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
return 0;
case kRotate270:
- // We need to rotate and rescale, we use plane Y as temporal storage.
- RotatePlane270(src_u, src_stride_u, dst_y, height, halfwidth, height);
- ScalePlane(dst_y, height, height, halfwidth, dst_u, halfheight,
- halfheight, width, kFilterBilinear);
- RotatePlane270(src_v, src_stride_v, dst_y, height, halfwidth, height);
- ScalePlane(dst_y, height, height, halfwidth, dst_v, halfheight,
- halfheight, width, kFilterLinear);
+ RotatePlane270(src_u, src_stride_u, dst_y, dst_stride_y, halfwidth,
+ height);
+ r = ScalePlane(dst_y, dst_stride_y, height, halfwidth, dst_u,
+ dst_stride_u, halfheight, width, kFilterBilinear);
+ if (r != 0) {
+ return r;
+ }
+ RotatePlane270(src_v, src_stride_v, dst_y, dst_stride_y, halfwidth,
+ height);
+ r = ScalePlane(dst_y, dst_stride_y, height, halfwidth, dst_v,
+ dst_stride_v, halfheight, width, kFilterLinear);
+ if (r != 0) {
+ return r;
+ }
RotatePlane270(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
-
return 0;
case kRotate180:
RotatePlane180(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
@@ -828,6 +990,241 @@ int Android420ToI420Rotate(const uint8_t* src_y,
return -1;
}
+LIBYUV_API
+int I010Rotate(const uint16_t* src_y,
+ int src_stride_y,
+ const uint16_t* src_u,
+ int src_stride_u,
+ const uint16_t* src_v,
+ int src_stride_v,
+ uint16_t* dst_y,
+ int dst_stride_y,
+ uint16_t* dst_u,
+ int dst_stride_u,
+ uint16_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height,
+ enum RotationMode mode) {
+ int halfwidth = (width + 1) >> 1;
+ int halfheight = (height + 1) >> 1;
+ if (!src_y || !src_u || !src_v || width <= 0 || height == 0 || !dst_y ||
+ !dst_u || !dst_v || dst_stride_y < 0) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ src_y = src_y + (height - 1) * src_stride_y;
+ src_u = src_u + (height - 1) * src_stride_u;
+ src_v = src_v + (height - 1) * src_stride_v;
+ src_stride_y = -src_stride_y;
+ src_stride_u = -src_stride_u;
+ src_stride_v = -src_stride_v;
+ }
+
+ switch (mode) {
+ case kRotate0:
+ // copy frame
+ return I010Copy(src_y, src_stride_y, src_u, src_stride_u, src_v,
+ src_stride_v, dst_y, dst_stride_y, dst_u, dst_stride_u,
+ dst_v, dst_stride_v, width, height);
+ case kRotate90:
+ RotatePlane90_16(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
+ RotatePlane90_16(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth,
+ halfheight);
+ RotatePlane90_16(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth,
+ halfheight);
+ return 0;
+ case kRotate270:
+ RotatePlane270_16(src_y, src_stride_y, dst_y, dst_stride_y, width,
+ height);
+ RotatePlane270_16(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth,
+ halfheight);
+ RotatePlane270_16(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth,
+ halfheight);
+ return 0;
+ case kRotate180:
+ RotatePlane180_16(src_y, src_stride_y, dst_y, dst_stride_y, width,
+ height);
+ RotatePlane180_16(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth,
+ halfheight);
+ RotatePlane180_16(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth,
+ halfheight);
+ return 0;
+ default:
+ break;
+ }
+ return -1;
+}
+
+// I210 has half width x full height UV planes, so rotate by 90 and 270
+// require scaling to maintain 422 subsampling.
+LIBYUV_API
+int I210Rotate(const uint16_t* src_y,
+ int src_stride_y,
+ const uint16_t* src_u,
+ int src_stride_u,
+ const uint16_t* src_v,
+ int src_stride_v,
+ uint16_t* dst_y,
+ int dst_stride_y,
+ uint16_t* dst_u,
+ int dst_stride_u,
+ uint16_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height,
+ enum RotationMode mode) {
+ int halfwidth = (width + 1) >> 1;
+ int halfheight = (height + 1) >> 1;
+ int r;
+ if (!src_y || !src_u || !src_v || width <= 0 || height == 0 || !dst_y ||
+ !dst_u || !dst_v) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ src_y = src_y + (height - 1) * src_stride_y;
+ src_u = src_u + (height - 1) * src_stride_u;
+ src_v = src_v + (height - 1) * src_stride_v;
+ src_stride_y = -src_stride_y;
+ src_stride_u = -src_stride_u;
+ src_stride_v = -src_stride_v;
+ }
+
+ switch (mode) {
+ case kRotate0:
+ // Copy frame
+ CopyPlane_16(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
+ CopyPlane_16(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth, height);
+ CopyPlane_16(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth, height);
+ return 0;
+
+ // Note on temporary Y plane for UV.
+ // Rotation of UV first fits within the Y destination plane rows.
+ // Y plane is width x height
+ // Y plane rotated is height x width
+ // UV plane is (width / 2) x height
+ // UV plane rotated is height x (width / 2)
+ // UV plane rotated+scaled is (height / 2) x width.
+ // UV plane rotated is a temporary that fits within the Y plane rotated.
+
+ case kRotate90:
+ RotatePlane90_16(src_u, src_stride_u, dst_y, dst_stride_y, halfwidth,
+ height);
+ r = ScalePlane_16(dst_y, dst_stride_y, height, halfwidth, dst_u,
+ dst_stride_u, halfheight, width, kFilterBilinear);
+ if (r != 0) {
+ return r;
+ }
+ RotatePlane90_16(src_v, src_stride_v, dst_y, dst_stride_y, halfwidth,
+ height);
+ r = ScalePlane_16(dst_y, dst_stride_y, height, halfwidth, dst_v,
+ dst_stride_v, halfheight, width, kFilterLinear);
+ if (r != 0) {
+ return r;
+ }
+ RotatePlane90_16(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
+ return 0;
+ case kRotate270:
+ RotatePlane270_16(src_u, src_stride_u, dst_y, dst_stride_y, halfwidth,
+ height);
+ r = ScalePlane_16(dst_y, dst_stride_y, height, halfwidth, dst_u,
+ dst_stride_u, halfheight, width, kFilterBilinear);
+ if (r != 0) {
+ return r;
+ }
+ RotatePlane270_16(src_v, src_stride_v, dst_y, dst_stride_y, halfwidth,
+ height);
+ r = ScalePlane_16(dst_y, dst_stride_y, height, halfwidth, dst_v,
+ dst_stride_v, halfheight, width, kFilterLinear);
+ if (r != 0) {
+ return r;
+ }
+ RotatePlane270_16(src_y, src_stride_y, dst_y, dst_stride_y, width,
+ height);
+ return 0;
+ case kRotate180:
+ RotatePlane180_16(src_y, src_stride_y, dst_y, dst_stride_y, width,
+ height);
+ RotatePlane180_16(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth,
+ height);
+ RotatePlane180_16(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth,
+ height);
+ return 0;
+ default:
+ break;
+ }
+ return -1;
+}
+
+LIBYUV_API
+int I410Rotate(const uint16_t* src_y,
+ int src_stride_y,
+ const uint16_t* src_u,
+ int src_stride_u,
+ const uint16_t* src_v,
+ int src_stride_v,
+ uint16_t* dst_y,
+ int dst_stride_y,
+ uint16_t* dst_u,
+ int dst_stride_u,
+ uint16_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height,
+ enum RotationMode mode) {
+ if (!src_y || !src_u || !src_v || width <= 0 || height == 0 || !dst_y ||
+ !dst_u || !dst_v || dst_stride_y < 0) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ src_y = src_y + (height - 1) * src_stride_y;
+ src_u = src_u + (height - 1) * src_stride_u;
+ src_v = src_v + (height - 1) * src_stride_v;
+ src_stride_y = -src_stride_y;
+ src_stride_u = -src_stride_u;
+ src_stride_v = -src_stride_v;
+ }
+
+ switch (mode) {
+ case kRotate0:
+ // copy frame
+ CopyPlane_16(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
+ CopyPlane_16(src_u, src_stride_u, dst_u, dst_stride_u, width, height);
+ CopyPlane_16(src_v, src_stride_v, dst_v, dst_stride_v, width, height);
+ return 0;
+ case kRotate90:
+ RotatePlane90_16(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
+ RotatePlane90_16(src_u, src_stride_u, dst_u, dst_stride_u, width, height);
+ RotatePlane90_16(src_v, src_stride_v, dst_v, dst_stride_v, width, height);
+ return 0;
+ case kRotate270:
+ RotatePlane270_16(src_y, src_stride_y, dst_y, dst_stride_y, width,
+ height);
+ RotatePlane270_16(src_u, src_stride_u, dst_u, dst_stride_u, width,
+ height);
+ RotatePlane270_16(src_v, src_stride_v, dst_v, dst_stride_v, width,
+ height);
+ return 0;
+ case kRotate180:
+ RotatePlane180_16(src_y, src_stride_y, dst_y, dst_stride_y, width,
+ height);
+ RotatePlane180_16(src_u, src_stride_u, dst_u, dst_stride_u, width,
+ height);
+ RotatePlane180_16(src_v, src_stride_v, dst_v, dst_stride_v, width,
+ height);
+ return 0;
+ default:
+ break;
+ }
+ return -1;
+}
+
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
diff --git a/files/source/rotate_any.cc b/source/rotate_any.cc
index 88ca7876..88ca7876 100644
--- a/files/source/rotate_any.cc
+++ b/source/rotate_any.cc
diff --git a/files/source/rotate_argb.cc b/source/rotate_argb.cc
index 539cf98d..d55fac4f 100644
--- a/files/source/rotate_argb.cc
+++ b/source/rotate_argb.cc
@@ -8,11 +8,12 @@
* be found in the AUTHORS file in the root of the source tree.
*/
-#include "libyuv/rotate.h"
+#include "libyuv/rotate_argb.h"
#include "libyuv/convert.h"
#include "libyuv/cpu_id.h"
#include "libyuv/planar_functions.h"
+#include "libyuv/rotate.h"
#include "libyuv/row.h"
#include "libyuv/scale_row.h" /* for ScaleARGBRowDownEven_ */
@@ -68,6 +69,11 @@ static int ARGBTranspose(const uint8_t* src_argb,
}
}
#endif
+#if defined(HAS_SCALEARGBROWDOWNEVEN_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ ScaleARGBRowDownEven = ScaleARGBRowDownEven_RVV;
+ }
+#endif
for (i = 0; i < width; ++i) { // column of source to row of dest.
ScaleARGBRowDownEven(src_argb, 0, src_pixel_step, dst_argb, height);
@@ -114,7 +120,6 @@ static int ARGBRotate180(const uint8_t* src_argb,
int width,
int height) {
// Swap first and last row and mirror the content. Uses a temporary row.
- align_buffer_64(row, width * 4);
const uint8_t* src_bot = src_argb + src_stride_argb * (height - 1);
uint8_t* dst_bot = dst_argb + dst_stride_argb * (height - 1);
int half_height = (height + 1) >> 1;
@@ -123,6 +128,9 @@ static int ARGBRotate180(const uint8_t* src_argb,
ARGBMirrorRow_C;
void (*CopyRow)(const uint8_t* src_argb, uint8_t* dst_argb, int width) =
CopyRow_C;
+ align_buffer_64(row, width * 4);
+ if (!row)
+ return 1;
#if defined(HAS_ARGBMIRRORROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ARGBMirrorRow = ARGBMirrorRow_Any_NEON;
@@ -155,6 +163,14 @@ static int ARGBRotate180(const uint8_t* src_argb,
}
}
#endif
+#if defined(HAS_ARGBMIRRORROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ ARGBMirrorRow = ARGBMirrorRow_Any_LSX;
+ if (IS_ALIGNED(width, 8)) {
+ ARGBMirrorRow = ARGBMirrorRow_LSX;
+ }
+ }
+#endif
#if defined(HAS_ARGBMIRRORROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
ARGBMirrorRow = ARGBMirrorRow_Any_LASX;
@@ -183,6 +199,11 @@ static int ARGBRotate180(const uint8_t* src_argb,
CopyRow = IS_ALIGNED(width * 4, 32) ? CopyRow_NEON : CopyRow_Any_NEON;
}
#endif
+#if defined(HAS_COPYROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ CopyRow = CopyRow_RVV;
+ }
+#endif
// Odd height will harmlessly mirror the middle row twice.
for (y = 0; y < half_height; ++y) {
diff --git a/source/rotate_common.cc b/source/rotate_common.cc
new file mode 100644
index 00000000..e72608e9
--- /dev/null
+++ b/source/rotate_common.cc
@@ -0,0 +1,198 @@
+/*
+ * Copyright 2011 The LibYuv Project Authors. All rights reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "libyuv/rotate_row.h"
+#include "libyuv/row.h"
+
+#ifdef __cplusplus
+namespace libyuv {
+extern "C" {
+#endif
+
+void TransposeWx8_C(const uint8_t* src,
+ int src_stride,
+ uint8_t* dst,
+ int dst_stride,
+ int width) {
+ int i;
+ for (i = 0; i < width; ++i) {
+ dst[0] = src[0 * src_stride];
+ dst[1] = src[1 * src_stride];
+ dst[2] = src[2 * src_stride];
+ dst[3] = src[3 * src_stride];
+ dst[4] = src[4 * src_stride];
+ dst[5] = src[5 * src_stride];
+ dst[6] = src[6 * src_stride];
+ dst[7] = src[7 * src_stride];
+ ++src;
+ dst += dst_stride;
+ }
+}
+
+void TransposeUVWx8_C(const uint8_t* src,
+ int src_stride,
+ uint8_t* dst_a,
+ int dst_stride_a,
+ uint8_t* dst_b,
+ int dst_stride_b,
+ int width) {
+ int i;
+ for (i = 0; i < width; ++i) {
+ dst_a[0] = src[0 * src_stride + 0];
+ dst_b[0] = src[0 * src_stride + 1];
+ dst_a[1] = src[1 * src_stride + 0];
+ dst_b[1] = src[1 * src_stride + 1];
+ dst_a[2] = src[2 * src_stride + 0];
+ dst_b[2] = src[2 * src_stride + 1];
+ dst_a[3] = src[3 * src_stride + 0];
+ dst_b[3] = src[3 * src_stride + 1];
+ dst_a[4] = src[4 * src_stride + 0];
+ dst_b[4] = src[4 * src_stride + 1];
+ dst_a[5] = src[5 * src_stride + 0];
+ dst_b[5] = src[5 * src_stride + 1];
+ dst_a[6] = src[6 * src_stride + 0];
+ dst_b[6] = src[6 * src_stride + 1];
+ dst_a[7] = src[7 * src_stride + 0];
+ dst_b[7] = src[7 * src_stride + 1];
+ src += 2;
+ dst_a += dst_stride_a;
+ dst_b += dst_stride_b;
+ }
+}
+
+void TransposeWxH_C(const uint8_t* src,
+ int src_stride,
+ uint8_t* dst,
+ int dst_stride,
+ int width,
+ int height) {
+ int i;
+ for (i = 0; i < width; ++i) {
+ int j;
+ for (j = 0; j < height; ++j) {
+ dst[i * dst_stride + j] = src[j * src_stride + i];
+ }
+ }
+}
+
+void TransposeUVWxH_C(const uint8_t* src,
+ int src_stride,
+ uint8_t* dst_a,
+ int dst_stride_a,
+ uint8_t* dst_b,
+ int dst_stride_b,
+ int width,
+ int height) {
+ int i;
+ for (i = 0; i < width * 2; i += 2) {
+ int j;
+ for (j = 0; j < height; ++j) {
+ dst_a[((i >> 1) * dst_stride_a) + j] = src[i + (j * src_stride)];
+ dst_b[((i >> 1) * dst_stride_b) + j] = src[i + (j * src_stride) + 1];
+ }
+ }
+}
+
+void TransposeWx8_16_C(const uint16_t* src,
+ int src_stride,
+ uint16_t* dst,
+ int dst_stride,
+ int width) {
+ int i;
+ for (i = 0; i < width; ++i) {
+ dst[0] = src[0 * src_stride];
+ dst[1] = src[1 * src_stride];
+ dst[2] = src[2 * src_stride];
+ dst[3] = src[3 * src_stride];
+ dst[4] = src[4 * src_stride];
+ dst[5] = src[5 * src_stride];
+ dst[6] = src[6 * src_stride];
+ dst[7] = src[7 * src_stride];
+ ++src;
+ dst += dst_stride;
+ }
+}
+
+void TransposeWxH_16_C(const uint16_t* src,
+ int src_stride,
+ uint16_t* dst,
+ int dst_stride,
+ int width,
+ int height) {
+ int i;
+ for (i = 0; i < width; ++i) {
+ int j;
+ for (j = 0; j < height; ++j) {
+ dst[i * dst_stride + j] = src[j * src_stride + i];
+ }
+ }
+}
+
+// Transpose 32 bit values (ARGB)
+void Transpose4x4_32_C(const uint8_t* src,
+ int src_stride,
+ uint8_t* dst,
+ int dst_stride,
+ int width) {
+ const uint8_t* src1 = src + src_stride;
+ const uint8_t* src2 = src1 + src_stride;
+ const uint8_t* src3 = src2 + src_stride;
+ uint8_t* dst1 = dst + dst_stride;
+ uint8_t* dst2 = dst1 + dst_stride;
+ uint8_t* dst3 = dst2 + dst_stride;
+ int i;
+ for (i = 0; i < width; i += 4) {
+ uint32_t p00 = ((uint32_t*)(src))[0];
+ uint32_t p10 = ((uint32_t*)(src))[1];
+ uint32_t p20 = ((uint32_t*)(src))[2];
+ uint32_t p30 = ((uint32_t*)(src))[3];
+ uint32_t p01 = ((uint32_t*)(src1))[0];
+ uint32_t p11 = ((uint32_t*)(src1))[1];
+ uint32_t p21 = ((uint32_t*)(src1))[2];
+ uint32_t p31 = ((uint32_t*)(src1))[3];
+ uint32_t p02 = ((uint32_t*)(src2))[0];
+ uint32_t p12 = ((uint32_t*)(src2))[1];
+ uint32_t p22 = ((uint32_t*)(src2))[2];
+ uint32_t p32 = ((uint32_t*)(src2))[3];
+ uint32_t p03 = ((uint32_t*)(src3))[0];
+ uint32_t p13 = ((uint32_t*)(src3))[1];
+ uint32_t p23 = ((uint32_t*)(src3))[2];
+ uint32_t p33 = ((uint32_t*)(src3))[3];
+ ((uint32_t*)(dst))[0] = p00;
+ ((uint32_t*)(dst))[1] = p01;
+ ((uint32_t*)(dst))[2] = p02;
+ ((uint32_t*)(dst))[3] = p03;
+ ((uint32_t*)(dst1))[0] = p10;
+ ((uint32_t*)(dst1))[1] = p11;
+ ((uint32_t*)(dst1))[2] = p12;
+ ((uint32_t*)(dst1))[3] = p13;
+ ((uint32_t*)(dst2))[0] = p20;
+ ((uint32_t*)(dst2))[1] = p21;
+ ((uint32_t*)(dst2))[2] = p22;
+ ((uint32_t*)(dst2))[3] = p23;
+ ((uint32_t*)(dst3))[0] = p30;
+ ((uint32_t*)(dst3))[1] = p31;
+ ((uint32_t*)(dst3))[2] = p32;
+ ((uint32_t*)(dst3))[3] = p33;
+ src += src_stride * 4; // advance 4 rows
+ src1 += src_stride * 4;
+ src2 += src_stride * 4;
+ src3 += src_stride * 4;
+ dst += 4 * 4; // advance 4 columns
+ dst1 += 4 * 4;
+ dst2 += 4 * 4;
+ dst3 += 4 * 4;
+ }
+}
+
+#ifdef __cplusplus
+} // extern "C"
+} // namespace libyuv
+#endif
diff --git a/files/source/rotate_gcc.cc b/source/rotate_gcc.cc
index 1a3f8cbb..fd5eee05 100644
--- a/files/source/rotate_gcc.cc
+++ b/source/rotate_gcc.cc
@@ -365,6 +365,136 @@ void TransposeUVWx8_SSE2(const uint8_t* src,
"xmm7", "xmm8", "xmm9");
}
#endif // defined(HAS_TRANSPOSEUVWX8_SSE2)
+
+#if defined(HAS_TRANSPOSE4X4_32_SSE2)
+// 4 values, little endian view
+// a b c d
+// e f g h
+// i j k l
+// m n o p
+
+// transpose 2x2
+// a e b f from row 0, 1
+// i m j n from row 2, 3
+// c g d h from row 0, 1
+// k o l p from row 2, 3
+
+// transpose 4x4
+// a e i m from row 0, 1
+// b f j n from row 0, 1
+// c g k o from row 2, 3
+// d h l p from row 2, 3
+
+// Transpose 32 bit values (ARGB)
+void Transpose4x4_32_SSE2(const uint8_t* src,
+ int src_stride,
+ uint8_t* dst,
+ int dst_stride,
+ int width) {
+ asm volatile(
+ // Main loop transpose 4x4. Read a column, write a row.
+ "1: \n"
+ "movdqu (%0),%%xmm0 \n" // a b c d
+ "movdqu (%0,%3),%%xmm1 \n" // e f g h
+ "lea (%0,%3,2),%0 \n" // src += stride * 2
+ "movdqu (%0),%%xmm2 \n" // i j k l
+ "movdqu (%0,%3),%%xmm3 \n" // m n o p
+ "lea (%0,%3,2),%0 \n" // src += stride * 2
+
+ // Transpose 2x2
+ "movdqa %%xmm0,%%xmm4 \n"
+ "movdqa %%xmm2,%%xmm5 \n"
+ "movdqa %%xmm0,%%xmm6 \n"
+ "movdqa %%xmm2,%%xmm7 \n"
+ "punpckldq %%xmm1,%%xmm4 \n" // a e b f from row 0, 1
+ "punpckldq %%xmm3,%%xmm5 \n" // i m j n from row 2, 3
+ "punpckhdq %%xmm1,%%xmm6 \n" // c g d h from row 0, 1
+ "punpckhdq %%xmm3,%%xmm7 \n" // k o l p from row 2, 3
+
+ // Transpose 4x4
+ "movdqa %%xmm4,%%xmm0 \n"
+ "movdqa %%xmm4,%%xmm1 \n"
+ "movdqa %%xmm6,%%xmm2 \n"
+ "movdqa %%xmm6,%%xmm3 \n"
+ "punpcklqdq %%xmm5,%%xmm0 \n" // a e i m from row 0, 1
+ "punpckhqdq %%xmm5,%%xmm1 \n" // b f j n from row 0, 1
+ "punpcklqdq %%xmm7,%%xmm2 \n" // c g k o from row 2, 3
+ "punpckhqdq %%xmm7,%%xmm3 \n" // d h l p from row 2, 3
+
+ "movdqu %%xmm0,(%1) \n"
+ "lea 16(%1,%4),%1 \n" // dst += stride + 16
+ "movdqu %%xmm1,-16(%1) \n"
+ "movdqu %%xmm2,-16(%1,%4) \n"
+ "movdqu %%xmm3,-16(%1,%4,2) \n"
+ "sub %4,%1 \n"
+ "sub $0x4,%2 \n"
+ "jg 1b \n"
+ : "+r"(src), // %0
+ "+r"(dst), // %1
+ "+rm"(width) // %2
+ : "r"((ptrdiff_t)(src_stride)), // %3
+ "r"((ptrdiff_t)(dst_stride)) // %4
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6",
+ "xmm7");
+}
+#endif // defined(HAS_TRANSPOSE4X4_32_SSE2)
+
+#if defined(HAS_TRANSPOSE4X4_32_AVX2)
+
+// Transpose 32 bit values (ARGB)
+void Transpose4x4_32_AVX2(const uint8_t* src,
+ int src_stride,
+ uint8_t* dst,
+ int dst_stride,
+ int width) {
+ asm volatile(
+ // Main loop transpose 2 blocks of 4x4. Read a column, write a row.
+ "1: \n"
+ "vmovdqu (%0),%%xmm0 \n" // a b c d
+ "vmovdqu (%0,%3),%%xmm1 \n" // e f g h
+ "lea (%0,%3,2),%0 \n" // src += stride * 2
+ "vmovdqu (%0),%%xmm2 \n" // i j k l
+ "vmovdqu (%0,%3),%%xmm3 \n" // m n o p
+ "lea (%0,%3,2),%0 \n" // src += stride * 2
+
+ "vinserti128 $1,(%0),%%ymm0,%%ymm0 \n" // a b c d
+ "vinserti128 $1,(%0,%3),%%ymm1,%%ymm1 \n" // e f g h
+ "lea (%0,%3,2),%0 \n" // src += stride * 2
+ "vinserti128 $1,(%0),%%ymm2,%%ymm2 \n" // i j k l
+ "vinserti128 $1,(%0,%3),%%ymm3,%%ymm3 \n" // m n o p
+ "lea (%0,%3,2),%0 \n" // src += stride * 2
+
+ // Transpose 2x2
+ "vpunpckldq %%ymm1,%%ymm0,%%ymm4 \n" // a e b f from row 0, 1
+ "vpunpckldq %%ymm3,%%ymm2,%%ymm5 \n" // i m j n from row 2, 3
+ "vpunpckhdq %%ymm1,%%ymm0,%%ymm6 \n" // c g d h from row 0, 1
+ "vpunpckhdq %%ymm3,%%ymm2,%%ymm7 \n" // k o l p from row 2, 3
+
+ // Transpose 4x4
+ "vpunpcklqdq %%ymm5,%%ymm4,%%ymm0 \n" // a e i m from row 0, 1
+ "vpunpckhqdq %%ymm5,%%ymm4,%%ymm1 \n" // b f j n from row 0, 1
+ "vpunpcklqdq %%ymm7,%%ymm6,%%ymm2 \n" // c g k o from row 2, 3
+ "vpunpckhqdq %%ymm7,%%ymm6,%%ymm3 \n" // d h l p from row 2, 3
+
+ "vmovdqu %%ymm0,(%1) \n"
+ "lea 32(%1,%4),%1 \n" // dst += stride + 32
+ "vmovdqu %%ymm1,-32(%1) \n"
+ "vmovdqu %%ymm2,-32(%1,%4) \n"
+ "vmovdqu %%ymm3,-32(%1,%4,2) \n"
+ "sub %4,%1 \n"
+ "sub $0x8,%2 \n"
+ "jg 1b \n"
+ "vzeroupper \n"
+ : "+r"(src), // %0
+ "+r"(dst), // %1
+ "+rm"(width) // %2
+ : "r"((ptrdiff_t)(src_stride)), // %3
+ "r"((ptrdiff_t)(dst_stride)) // %4
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6",
+ "xmm7");
+}
+#endif // defined(HAS_TRANSPOSE4X4_32_AVX2)
+
#endif // defined(__x86_64__) || defined(__i386__)
#ifdef __cplusplus
diff --git a/files/source/rotate_lsx.cc b/source/rotate_lsx.cc
index 94a2b91c..94a2b91c 100644
--- a/files/source/rotate_lsx.cc
+++ b/source/rotate_lsx.cc
diff --git a/files/source/rotate_msa.cc b/source/rotate_msa.cc
index 99bdca65..99bdca65 100644
--- a/files/source/rotate_msa.cc
+++ b/source/rotate_msa.cc
diff --git a/files/source/rotate_neon.cc b/source/rotate_neon.cc
index 844df2bf..569a7318 100644
--- a/files/source/rotate_neon.cc
+++ b/source/rotate_neon.cc
@@ -410,6 +410,46 @@ void TransposeUVWx8_NEON(const uint8_t* src,
: "r"(&kVTbl4x4TransposeDi) // %8
: "memory", "cc", "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11");
}
+
+// Transpose 32 bit values (ARGB)
+void Transpose4x4_32_NEON(const uint8_t* src,
+ int src_stride,
+ uint8_t* dst,
+ int dst_stride,
+ int width) {
+ const uint8_t* src1 = src + src_stride;
+ const uint8_t* src2 = src1 + src_stride;
+ const uint8_t* src3 = src2 + src_stride;
+ uint8_t* dst1 = dst + dst_stride;
+ uint8_t* dst2 = dst1 + dst_stride;
+ uint8_t* dst3 = dst2 + dst_stride;
+ asm volatile(
+ // Main loop transpose 4x4. Read a column, write a row.
+ "1: \n"
+ "vld4.32 {d0[0], d2[0], d4[0], d6[0]}, [%0], %9 \n"
+ "vld4.32 {d0[1], d2[1], d4[1], d6[1]}, [%1], %9 \n"
+ "vld4.32 {d1[0], d3[0], d5[0], d7[0]}, [%2], %9 \n"
+ "vld4.32 {d1[1], d3[1], d5[1], d7[1]}, [%3], %9 \n"
+ "subs %8, %8, #4 \n" // w -= 4
+ "vst1.8 {q0}, [%4]! \n"
+ "vst1.8 {q1}, [%5]! \n"
+ "vst1.8 {q2}, [%6]! \n"
+ "vst1.8 {q3}, [%7]! \n"
+ "bgt 1b \n"
+
+ : "+r"(src), // %0
+ "+r"(src1), // %1
+ "+r"(src2), // %2
+ "+r"(src3), // %3
+ "+r"(dst), // %4
+ "+r"(dst1), // %5
+ "+r"(dst2), // %6
+ "+r"(dst3), // %7
+ "+r"(width) // %8
+ : "r"((ptrdiff_t)(src_stride * 4)) // %9
+ : "memory", "cc", "q0", "q1", "q2", "q3");
+}
+
#endif // defined(__ARM_NEON__) && !defined(__aarch64__)
#ifdef __cplusplus
diff --git a/files/source/rotate_neon64.cc b/source/rotate_neon64.cc
index 43c15817..95047fa7 100644
--- a/files/source/rotate_neon64.cc
+++ b/source/rotate_neon64.cc
@@ -201,13 +201,13 @@ void TransposeWx8_NEON(const uint8_t* src,
"4: \n"
- : "=&r"(src_temp), // %0
- "+r"(src), // %1
- "+r"(dst), // %2
- "+r"(width) // %3
- : "r"(&kVTbl4x4Transpose), // %4
- "r"(static_cast<ptrdiff_t>(src_stride)), // %5
- "r"(static_cast<ptrdiff_t>(dst_stride)) // %6
+ : "=&r"(src_temp), // %0
+ "+r"(src), // %1
+ "+r"(dst), // %2
+ "+r"(width) // %3
+ : "r"(&kVTbl4x4Transpose), // %4
+ "r"((ptrdiff_t)src_stride), // %5
+ "r"((ptrdiff_t)dst_stride) // %6
: "memory", "cc", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16",
"v17", "v18", "v19", "v20", "v21", "v22", "v23");
}
@@ -423,18 +423,57 @@ void TransposeUVWx8_NEON(const uint8_t* src,
"4: \n"
- : "=&r"(src_temp), // %0
- "+r"(src), // %1
- "+r"(dst_a), // %2
- "+r"(dst_b), // %3
- "+r"(width) // %4
- : "r"(static_cast<ptrdiff_t>(src_stride)), // %5
- "r"(static_cast<ptrdiff_t>(dst_stride_a)), // %6
- "r"(static_cast<ptrdiff_t>(dst_stride_b)), // %7
- "r"(&kVTbl4x4TransposeDi) // %8
+ : "=&r"(src_temp), // %0
+ "+r"(src), // %1
+ "+r"(dst_a), // %2
+ "+r"(dst_b), // %3
+ "+r"(width) // %4
+ : "r"((ptrdiff_t)src_stride), // %5
+ "r"((ptrdiff_t)dst_stride_a), // %6
+ "r"((ptrdiff_t)dst_stride_b), // %7
+ "r"(&kVTbl4x4TransposeDi) // %8
: "memory", "cc", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16",
"v17", "v18", "v19", "v20", "v21", "v22", "v23", "v30", "v31");
}
+
+// Transpose 32 bit values (ARGB)
+void Transpose4x4_32_NEON(const uint8_t* src,
+ int src_stride,
+ uint8_t* dst,
+ int dst_stride,
+ int width) {
+ const uint8_t* src1 = src + src_stride;
+ const uint8_t* src2 = src1 + src_stride;
+ const uint8_t* src3 = src2 + src_stride;
+ uint8_t* dst1 = dst + dst_stride;
+ uint8_t* dst2 = dst1 + dst_stride;
+ uint8_t* dst3 = dst2 + dst_stride;
+ asm volatile(
+ // Main loop transpose 4x4. Read a column, write a row.
+ "1: \n"
+ "ld4 {v0.s, v1.s, v2.s, v3.s}[0], [%0], %9 \n"
+ "ld4 {v0.s, v1.s, v2.s, v3.s}[1], [%1], %9 \n"
+ "ld4 {v0.s, v1.s, v2.s, v3.s}[2], [%2], %9 \n"
+ "ld4 {v0.s, v1.s, v2.s, v3.s}[3], [%3], %9 \n"
+ "subs %w8, %w8, #4 \n" // w -= 4
+ "st1 {v0.4s}, [%4], 16 \n"
+ "st1 {v1.4s}, [%5], 16 \n"
+ "st1 {v2.4s}, [%6], 16 \n"
+ "st1 {v3.4s}, [%7], 16 \n"
+ "b.gt 1b \n"
+ : "+r"(src), // %0
+ "+r"(src1), // %1
+ "+r"(src2), // %2
+ "+r"(src3), // %3
+ "+r"(dst), // %4
+ "+r"(dst1), // %5
+ "+r"(dst2), // %6
+ "+r"(dst3), // %7
+ "+r"(width) // %8
+ : "r"((ptrdiff_t)(src_stride * 4)) // %9
+ : "memory", "cc", "v0", "v1", "v2", "v3");
+}
+
#endif // !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
#ifdef __cplusplus
diff --git a/files/source/rotate_win.cc b/source/rotate_win.cc
index a78873f8..a78873f8 100644
--- a/files/source/rotate_win.cc
+++ b/source/rotate_win.cc
diff --git a/files/source/row_any.cc b/source/row_any.cc
index 3781a9f2..e574543c 100644
--- a/files/source/row_any.cc
+++ b/source/row_any.cc
@@ -19,7 +19,7 @@ namespace libyuv {
extern "C" {
#endif
-// memset for temp is meant to clear the source buffer (not dest) so that
+// memset for vin is meant to clear the source buffer so that
// SIMD that reads full multiple of 16 bytes will not trigger msan errors.
// memset is not needed for production, as the garbage values are processed but
// not used, although there may be edge cases for subsampling.
@@ -35,20 +35,20 @@ extern "C" {
void NAMEANY(const uint8_t* y_buf, const uint8_t* u_buf, \
const uint8_t* v_buf, const uint8_t* a_buf, uint8_t* dst_ptr, \
int width) { \
- SIMD_ALIGNED(uint8_t temp[64 * 5]); \
- memset(temp, 0, 64 * 4); /* for msan */ \
+ SIMD_ALIGNED(uint8_t vin[64 * 4]); \
+ SIMD_ALIGNED(uint8_t vout[64]); \
+ memset(vin, 0, sizeof(vin)); /* for msan */ \
int r = width & MASK; \
int n = width & ~MASK; \
if (n > 0) { \
ANY_SIMD(y_buf, u_buf, v_buf, a_buf, dst_ptr, n); \
} \
- memcpy(temp, y_buf + n, r); \
- memcpy(temp + 64, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \
- memcpy(temp + 128, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \
- memcpy(temp + 192, a_buf + n, r); \
- ANY_SIMD(temp, temp + 64, temp + 128, temp + 192, temp + 256, MASK + 1); \
- memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, temp + 256, \
- SS(r, DUVSHIFT) * BPP); \
+ memcpy(vin, y_buf + n, r); \
+ memcpy(vin + 64, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \
+ memcpy(vin + 128, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \
+ memcpy(vin + 192, a_buf + n, r); \
+ ANY_SIMD(vin, vin + 64, vin + 128, vin + 192, vout, MASK + 1); \
+ memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, vout, SS(r, DUVSHIFT) * BPP); \
}
#ifdef HAS_MERGEARGBROW_SSE2
@@ -68,25 +68,25 @@ ANY41(MergeARGBRow_Any_NEON, MergeARGBRow_NEON, 0, 0, 4, 15)
void NAMEANY(const uint8_t* y_buf, const uint8_t* u_buf, \
const uint8_t* v_buf, const uint8_t* a_buf, uint8_t* dst_ptr, \
const struct YuvConstants* yuvconstants, int width) { \
- SIMD_ALIGNED(uint8_t temp[64 * 5]); \
- memset(temp, 0, 64 * 4); /* for msan */ \
+ SIMD_ALIGNED(uint8_t vin[64 * 4]); \
+ SIMD_ALIGNED(uint8_t vout[64]); \
+ memset(vin, 0, sizeof(vin)); /* for msan */ \
int r = width & MASK; \
int n = width & ~MASK; \
if (n > 0) { \
ANY_SIMD(y_buf, u_buf, v_buf, a_buf, dst_ptr, yuvconstants, n); \
} \
- memcpy(temp, y_buf + n, r); \
- memcpy(temp + 64, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \
- memcpy(temp + 128, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \
- memcpy(temp + 192, a_buf + n, r); \
+ memcpy(vin, y_buf + n, r); \
+ memcpy(vin + 64, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \
+ memcpy(vin + 128, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \
+ memcpy(vin + 192, a_buf + n, r); \
if (width & 1) { \
- temp[64 + SS(r, UVSHIFT)] = temp[64 + SS(r, UVSHIFT) - 1]; \
- temp[128 + SS(r, UVSHIFT)] = temp[128 + SS(r, UVSHIFT) - 1]; \
+ vin[64 + SS(r, UVSHIFT)] = vin[64 + SS(r, UVSHIFT) - 1]; \
+ vin[128 + SS(r, UVSHIFT)] = vin[128 + SS(r, UVSHIFT) - 1]; \
} \
- ANY_SIMD(temp, temp + 64, temp + 128, temp + 192, temp + 256, \
- yuvconstants, MASK + 1); \
- memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, temp + 256, \
- SS(r, DUVSHIFT) * BPP); \
+ ANY_SIMD(vin, vin + 64, vin + 128, vin + 192, vout, yuvconstants, \
+ MASK + 1); \
+ memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, vout, SS(r, DUVSHIFT) * BPP); \
}
#ifdef HAS_I444ALPHATOARGBROW_SSSE3
@@ -113,6 +113,9 @@ ANY41C(I444AlphaToARGBRow_Any_MSA, I444AlphaToARGBRow_MSA, 0, 0, 4, 7)
#ifdef HAS_I422ALPHATOARGBROW_MSA
ANY41C(I422AlphaToARGBRow_Any_MSA, I422AlphaToARGBRow_MSA, 1, 0, 4, 7)
#endif
+#ifdef HAS_I422ALPHATOARGBROW_LSX
+ANY41C(I422AlphaToARGBRow_Any_LSX, I422AlphaToARGBRow_LSX, 1, 0, 4, 15)
+#endif
#ifdef HAS_I422ALPHATOARGBROW_LASX
ANY41C(I422AlphaToARGBRow_Any_LASX, I422AlphaToARGBRow_LASX, 1, 0, 4, 15)
#endif
@@ -123,21 +126,20 @@ ANY41C(I422AlphaToARGBRow_Any_LASX, I422AlphaToARGBRow_LASX, 1, 0, 4, 15)
void NAMEANY(const T* y_buf, const T* u_buf, const T* v_buf, const T* a_buf, \
uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, \
int width) { \
- SIMD_ALIGNED(T temp[16 * 4]); \
- SIMD_ALIGNED(uint8_t out[64]); \
- memset(temp, 0, 16 * 4 * SBPP); /* for YUY2 and msan */ \
+ SIMD_ALIGNED(T vin[16 * 4]); \
+ SIMD_ALIGNED(uint8_t vout[64]); \
+ memset(vin, 0, sizeof(vin)); /* for YUY2 and msan */ \
int r = width & MASK; \
int n = width & ~MASK; \
if (n > 0) { \
ANY_SIMD(y_buf, u_buf, v_buf, a_buf, dst_ptr, yuvconstants, n); \
} \
- memcpy(temp, y_buf + n, r * SBPP); \
- memcpy(temp + 16, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT) * SBPP); \
- memcpy(temp + 32, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT) * SBPP); \
- memcpy(temp + 48, a_buf + n, r * SBPP); \
- ANY_SIMD(temp, temp + 16, temp + 32, temp + 48, out, yuvconstants, \
- MASK + 1); \
- memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, out, SS(r, DUVSHIFT) * BPP); \
+ memcpy(vin, y_buf + n, r * SBPP); \
+ memcpy(vin + 16, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT) * SBPP); \
+ memcpy(vin + 32, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT) * SBPP); \
+ memcpy(vin + 48, a_buf + n, r * SBPP); \
+ ANY_SIMD(vin, vin + 16, vin + 32, vin + 48, vout, yuvconstants, MASK + 1); \
+ memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, vout, SS(r, DUVSHIFT) * BPP); \
}
#ifdef HAS_I210ALPHATOARGBROW_SSSE3
@@ -190,20 +192,20 @@ ANY41CT(I410AlphaToARGBRow_Any_AVX2,
#define ANY41PT(NAMEANY, ANY_SIMD, STYPE, SBPP, DTYPE, BPP, MASK) \
void NAMEANY(const STYPE* r_buf, const STYPE* g_buf, const STYPE* b_buf, \
const STYPE* a_buf, DTYPE* dst_ptr, int depth, int width) { \
- SIMD_ALIGNED(STYPE temp[16 * 4]); \
- SIMD_ALIGNED(DTYPE out[64]); \
- memset(temp, 0, 16 * 4 * SBPP); /* for YUY2 and msan */ \
+ SIMD_ALIGNED(STYPE vin[16 * 4]); \
+ SIMD_ALIGNED(DTYPE vout[64]); \
+ memset(vin, 0, sizeof(vin)); /* for msan */ \
int r = width & MASK; \
int n = width & ~MASK; \
if (n > 0) { \
ANY_SIMD(r_buf, g_buf, b_buf, a_buf, dst_ptr, depth, n); \
} \
- memcpy(temp, r_buf + n, r * SBPP); \
- memcpy(temp + 16, g_buf + n, r * SBPP); \
- memcpy(temp + 32, b_buf + n, r * SBPP); \
- memcpy(temp + 48, a_buf + n, r * SBPP); \
- ANY_SIMD(temp, temp + 16, temp + 32, temp + 48, out, depth, MASK + 1); \
- memcpy((uint8_t*)dst_ptr + n * BPP, out, r * BPP); \
+ memcpy(vin, r_buf + n, r * SBPP); \
+ memcpy(vin + 16, g_buf + n, r * SBPP); \
+ memcpy(vin + 32, b_buf + n, r * SBPP); \
+ memcpy(vin + 48, a_buf + n, r * SBPP); \
+ ANY_SIMD(vin, vin + 16, vin + 32, vin + 48, vout, depth, MASK + 1); \
+ memcpy((uint8_t*)dst_ptr + n * BPP, vout, r * BPP); \
}
#ifdef HAS_MERGEAR64ROW_AVX2
@@ -237,22 +239,22 @@ ANY41PT(MergeARGB16To8Row_Any_NEON,
#undef ANY41PT
// Any 3 planes to 1.
-#define ANY31(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, BPP, MASK) \
- void NAMEANY(const uint8_t* y_buf, const uint8_t* u_buf, \
- const uint8_t* v_buf, uint8_t* dst_ptr, int width) { \
- SIMD_ALIGNED(uint8_t temp[64 * 4]); \
- memset(temp, 0, 64 * 3); /* for YUY2 and msan */ \
- int r = width & MASK; \
- int n = width & ~MASK; \
- if (n > 0) { \
- ANY_SIMD(y_buf, u_buf, v_buf, dst_ptr, n); \
- } \
- memcpy(temp, y_buf + n, r); \
- memcpy(temp + 64, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \
- memcpy(temp + 128, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \
- ANY_SIMD(temp, temp + 64, temp + 128, temp + 192, MASK + 1); \
- memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, temp + 192, \
- SS(r, DUVSHIFT) * BPP); \
+#define ANY31(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, BPP, MASK) \
+ void NAMEANY(const uint8_t* y_buf, const uint8_t* u_buf, \
+ const uint8_t* v_buf, uint8_t* dst_ptr, int width) { \
+ SIMD_ALIGNED(uint8_t vin[64 * 3]); \
+ SIMD_ALIGNED(uint8_t vout[64]); \
+ memset(vin, 0, sizeof(vin)); /* for YUY2 and msan */ \
+ int r = width & MASK; \
+ int n = width & ~MASK; \
+ if (n > 0) { \
+ ANY_SIMD(y_buf, u_buf, v_buf, dst_ptr, n); \
+ } \
+ memcpy(vin, y_buf + n, r); \
+ memcpy(vin + 64, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \
+ memcpy(vin + 128, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \
+ ANY_SIMD(vin, vin + 64, vin + 128, vout, MASK + 1); \
+ memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, vout, SS(r, DUVSHIFT) * BPP); \
}
// Merge functions.
@@ -285,6 +287,9 @@ ANY31(I422ToYUY2Row_Any_NEON, I422ToYUY2Row_NEON, 1, 1, 4, 15)
#ifdef HAS_I422TOYUY2ROW_MSA
ANY31(I422ToYUY2Row_Any_MSA, I422ToYUY2Row_MSA, 1, 1, 4, 31)
#endif
+#ifdef HAS_I422TOYUY2ROW_LSX
+ANY31(I422ToYUY2Row_Any_LSX, I422ToYUY2Row_LSX, 1, 1, 4, 15)
+#endif
#ifdef HAS_I422TOYUY2ROW_LASX
ANY31(I422ToYUY2Row_Any_LASX, I422ToYUY2Row_LASX, 1, 1, 4, 31)
#endif
@@ -294,6 +299,9 @@ ANY31(I422ToUYVYRow_Any_NEON, I422ToUYVYRow_NEON, 1, 1, 4, 15)
#ifdef HAS_I422TOUYVYROW_MSA
ANY31(I422ToUYVYRow_Any_MSA, I422ToUYVYRow_MSA, 1, 1, 4, 31)
#endif
+#ifdef HAS_I422TOUYVYROW_LSX
+ANY31(I422ToUYVYRow_Any_LSX, I422ToUYVYRow_LSX, 1, 1, 4, 15)
+#endif
#ifdef HAS_I422TOUYVYROW_LASX
ANY31(I422ToUYVYRow_Any_LASX, I422ToUYVYRow_LASX, 1, 1, 4, 31)
#endif
@@ -308,28 +316,27 @@ ANY31(BlendPlaneRow_Any_SSSE3, BlendPlaneRow_SSSE3, 0, 0, 1, 7)
// Note that odd width replication includes 444 due to implementation
// on arm that subsamples 444 to 422 internally.
// Any 3 planes to 1 with yuvconstants
-#define ANY31C(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, BPP, MASK) \
- void NAMEANY(const uint8_t* y_buf, const uint8_t* u_buf, \
- const uint8_t* v_buf, uint8_t* dst_ptr, \
- const struct YuvConstants* yuvconstants, int width) { \
- SIMD_ALIGNED(uint8_t temp[128 * 4]); \
- memset(temp, 0, 128 * 3); /* for YUY2 and msan */ \
- int r = width & MASK; \
- int n = width & ~MASK; \
- if (n > 0) { \
- ANY_SIMD(y_buf, u_buf, v_buf, dst_ptr, yuvconstants, n); \
- } \
- memcpy(temp, y_buf + n, r); \
- memcpy(temp + 128, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \
- memcpy(temp + 256, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \
- if (width & 1) { \
- temp[128 + SS(r, UVSHIFT)] = temp[128 + SS(r, UVSHIFT) - 1]; \
- temp[256 + SS(r, UVSHIFT)] = temp[256 + SS(r, UVSHIFT) - 1]; \
- } \
- ANY_SIMD(temp, temp + 128, temp + 256, temp + 384, yuvconstants, \
- MASK + 1); \
- memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, temp + 384, \
- SS(r, DUVSHIFT) * BPP); \
+#define ANY31C(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, BPP, MASK) \
+ void NAMEANY(const uint8_t* y_buf, const uint8_t* u_buf, \
+ const uint8_t* v_buf, uint8_t* dst_ptr, \
+ const struct YuvConstants* yuvconstants, int width) { \
+ SIMD_ALIGNED(uint8_t vin[128 * 3]); \
+ SIMD_ALIGNED(uint8_t vout[128]); \
+ memset(vin, 0, sizeof(vin)); /* for YUY2 and msan */ \
+ int r = width & MASK; \
+ int n = width & ~MASK; \
+ if (n > 0) { \
+ ANY_SIMD(y_buf, u_buf, v_buf, dst_ptr, yuvconstants, n); \
+ } \
+ memcpy(vin, y_buf + n, r); \
+ memcpy(vin + 128, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \
+ memcpy(vin + 256, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \
+ if (width & 1) { \
+ vin[128 + SS(r, UVSHIFT)] = vin[128 + SS(r, UVSHIFT) - 1]; \
+ vin[256 + SS(r, UVSHIFT)] = vin[256 + SS(r, UVSHIFT) - 1]; \
+ } \
+ ANY_SIMD(vin, vin + 128, vin + 256, vout, yuvconstants, MASK + 1); \
+ memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, vout, SS(r, DUVSHIFT) * BPP); \
}
#ifdef HAS_I422TOARGBROW_SSSE3
@@ -359,6 +366,9 @@ ANY31C(I422ToAR30Row_Any_AVX2, I422ToAR30Row_AVX2, 1, 0, 4, 15)
#ifdef HAS_I444TOARGBROW_SSSE3
ANY31C(I444ToARGBRow_Any_SSSE3, I444ToARGBRow_SSSE3, 0, 0, 4, 7)
#endif
+#ifdef HAS_I444TORGB24ROW_SSSE3
+ANY31C(I444ToRGB24Row_Any_SSSE3, I444ToRGB24Row_SSSE3, 0, 0, 3, 15)
+#endif
#ifdef HAS_I422TORGB24ROW_AVX2
ANY31C(I422ToRGB24Row_Any_AVX2, I422ToRGB24Row_AVX2, 1, 0, 3, 31)
#endif
@@ -374,6 +384,9 @@ ANY31C(I422ToRGBARow_Any_AVX2, I422ToRGBARow_AVX2, 1, 0, 4, 15)
#ifdef HAS_I444TOARGBROW_AVX2
ANY31C(I444ToARGBRow_Any_AVX2, I444ToARGBRow_AVX2, 0, 0, 4, 15)
#endif
+#ifdef HAS_I444TORGB24ROW_AVX2
+ANY31C(I444ToRGB24Row_Any_AVX2, I444ToRGB24Row_AVX2, 0, 0, 3, 31)
+#endif
#ifdef HAS_I422TOARGB4444ROW_AVX2
ANY31C(I422ToARGB4444Row_Any_AVX2, I422ToARGB4444Row_AVX2, 1, 0, 2, 15)
#endif
@@ -383,6 +396,9 @@ ANY31C(I422ToARGB1555Row_Any_AVX2, I422ToARGB1555Row_AVX2, 1, 0, 2, 15)
#ifdef HAS_I422TORGB565ROW_AVX2
ANY31C(I422ToRGB565Row_Any_AVX2, I422ToRGB565Row_AVX2, 1, 0, 2, 15)
#endif
+#ifdef HAS_I444TORGB24ROW_NEON
+ANY31C(I444ToRGB24Row_Any_NEON, I444ToRGB24Row_NEON, 0, 0, 3, 7)
+#endif
#ifdef HAS_I422TOARGBROW_NEON
ANY31C(I444ToARGBRow_Any_NEON, I444ToARGBRow_NEON, 0, 0, 4, 7)
ANY31C(I422ToARGBRow_Any_NEON, I422ToARGBRow_NEON, 1, 0, 4, 7)
@@ -401,6 +417,14 @@ ANY31C(I422ToARGB4444Row_Any_MSA, I422ToARGB4444Row_MSA, 1, 0, 2, 7)
ANY31C(I422ToARGB1555Row_Any_MSA, I422ToARGB1555Row_MSA, 1, 0, 2, 7)
ANY31C(I422ToRGB565Row_Any_MSA, I422ToRGB565Row_MSA, 1, 0, 2, 7)
#endif
+#ifdef HAS_I422TOARGBROW_LSX
+ANY31C(I422ToARGBRow_Any_LSX, I422ToARGBRow_LSX, 1, 0, 4, 15)
+ANY31C(I422ToRGBARow_Any_LSX, I422ToRGBARow_LSX, 1, 0, 4, 15)
+ANY31C(I422ToRGB24Row_Any_LSX, I422ToRGB24Row_LSX, 1, 0, 3, 15)
+ANY31C(I422ToRGB565Row_Any_LSX, I422ToRGB565Row_LSX, 1, 0, 2, 15)
+ANY31C(I422ToARGB4444Row_Any_LSX, I422ToARGB4444Row_LSX, 1, 0, 2, 15)
+ANY31C(I422ToARGB1555Row_Any_LSX, I422ToARGB1555Row_LSX, 1, 0, 2, 15)
+#endif
#ifdef HAS_I422TOARGBROW_LASX
ANY31C(I422ToARGBRow_Any_LASX, I422ToARGBRow_LASX, 1, 0, 4, 31)
ANY31C(I422ToRGBARow_Any_LASX, I422ToRGBARow_LASX, 1, 0, 4, 31)
@@ -420,19 +444,19 @@ ANY31C(I444ToARGBRow_Any_LSX, I444ToARGBRow_LSX, 0, 0, 4, 15)
void NAMEANY(const T* y_buf, const T* u_buf, const T* v_buf, \
uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, \
int width) { \
- SIMD_ALIGNED(T temp[16 * 3]); \
- SIMD_ALIGNED(uint8_t out[64]); \
- memset(temp, 0, 16 * 3 * SBPP); /* for YUY2 and msan */ \
+ SIMD_ALIGNED(T vin[16 * 3]); \
+ SIMD_ALIGNED(uint8_t vout[64]); \
+ memset(vin, 0, sizeof(vin)); /* for YUY2 and msan */ \
int r = width & MASK; \
int n = width & ~MASK; \
if (n > 0) { \
ANY_SIMD(y_buf, u_buf, v_buf, dst_ptr, yuvconstants, n); \
} \
- memcpy(temp, y_buf + n, r * SBPP); \
- memcpy(temp + 16, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT) * SBPP); \
- memcpy(temp + 32, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT) * SBPP); \
- ANY_SIMD(temp, temp + 16, temp + 32, out, yuvconstants, MASK + 1); \
- memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, out, SS(r, DUVSHIFT) * BPP); \
+ memcpy(vin, y_buf + n, r * SBPP); \
+ memcpy(vin + 16, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT) * SBPP); \
+ memcpy(vin + 32, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT) * SBPP); \
+ ANY_SIMD(vin, vin + 16, vin + 32, vout, yuvconstants, MASK + 1); \
+ memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, vout, SS(r, DUVSHIFT) * BPP); \
}
#ifdef HAS_I210TOAR30ROW_SSSE3
@@ -477,19 +501,19 @@ ANY31CT(I212ToAR30Row_Any_AVX2, I212ToAR30Row_AVX2, 1, 0, uint16_t, 2, 4, 15)
#define ANY31PT(NAMEANY, ANY_SIMD, STYPE, SBPP, DTYPE, BPP, MASK) \
void NAMEANY(const STYPE* r_buf, const STYPE* g_buf, const STYPE* b_buf, \
DTYPE* dst_ptr, int depth, int width) { \
- SIMD_ALIGNED(STYPE temp[16 * 3]); \
- SIMD_ALIGNED(DTYPE out[64]); \
- memset(temp, 0, 16 * 3 * SBPP); /* for YUY2 and msan */ \
+ SIMD_ALIGNED(STYPE vin[16 * 3]); \
+ SIMD_ALIGNED(DTYPE vout[64]); \
+ memset(vin, 0, sizeof(vin)); /* for YUY2 and msan */ \
int r = width & MASK; \
int n = width & ~MASK; \
if (n > 0) { \
ANY_SIMD(r_buf, g_buf, b_buf, dst_ptr, depth, n); \
} \
- memcpy(temp, r_buf + n, r * SBPP); \
- memcpy(temp + 16, g_buf + n, r * SBPP); \
- memcpy(temp + 32, b_buf + n, r * SBPP); \
- ANY_SIMD(temp, temp + 16, temp + 32, out, depth, MASK + 1); \
- memcpy((uint8_t*)dst_ptr + n * BPP, out, r * BPP); \
+ memcpy(vin, r_buf + n, r * SBPP); \
+ memcpy(vin + 16, g_buf + n, r * SBPP); \
+ memcpy(vin + 32, b_buf + n, r * SBPP); \
+ ANY_SIMD(vin, vin + 16, vin + 32, vout, depth, MASK + 1); \
+ memcpy((uint8_t*)dst_ptr + n * BPP, vout, r * BPP); \
}
#ifdef HAS_MERGEXR30ROW_AVX2
@@ -541,18 +565,19 @@ ANY31PT(MergeXRGB16To8Row_Any_NEON,
#define ANY21(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, SBPP2, BPP, MASK) \
void NAMEANY(const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* dst_ptr, \
int width) { \
- SIMD_ALIGNED(uint8_t temp[128 * 3]); \
- memset(temp, 0, 128 * 2); /* for msan */ \
+ SIMD_ALIGNED(uint8_t vin[128 * 2]); \
+ SIMD_ALIGNED(uint8_t vout[128]); \
+ memset(vin, 0, sizeof(vin)); /* for msan */ \
int r = width & MASK; \
int n = width & ~MASK; \
if (n > 0) { \
ANY_SIMD(y_buf, uv_buf, dst_ptr, n); \
} \
- memcpy(temp, y_buf + n * SBPP, r * SBPP); \
- memcpy(temp + 128, uv_buf + (n >> UVSHIFT) * SBPP2, \
+ memcpy(vin, y_buf + n * SBPP, r * SBPP); \
+ memcpy(vin + 128, uv_buf + (n >> UVSHIFT) * SBPP2, \
SS(r, UVSHIFT) * SBPP2); \
- ANY_SIMD(temp, temp + 128, temp + 256, MASK + 1); \
- memcpy(dst_ptr + n * BPP, temp + 256, r * BPP); \
+ ANY_SIMD(vin, vin + 128, vout, MASK + 1); \
+ memcpy(dst_ptr + n * BPP, vout, r * BPP); \
}
// Merge functions.
@@ -560,7 +585,10 @@ ANY31PT(MergeXRGB16To8Row_Any_NEON,
ANY21(MergeUVRow_Any_SSE2, MergeUVRow_SSE2, 0, 1, 1, 2, 15)
#endif
#ifdef HAS_MERGEUVROW_AVX2
-ANY21(MergeUVRow_Any_AVX2, MergeUVRow_AVX2, 0, 1, 1, 2, 31)
+ANY21(MergeUVRow_Any_AVX2, MergeUVRow_AVX2, 0, 1, 1, 2, 15)
+#endif
+#ifdef HAS_MERGEUVROW_AVX512BW
+ANY21(MergeUVRow_Any_AVX512BW, MergeUVRow_AVX512BW, 0, 1, 1, 2, 31)
#endif
#ifdef HAS_MERGEUVROW_NEON
ANY21(MergeUVRow_Any_NEON, MergeUVRow_NEON, 0, 1, 1, 2, 15)
@@ -611,18 +639,27 @@ ANY21(ARGBSubtractRow_Any_NEON, ARGBSubtractRow_NEON, 0, 4, 4, 4, 7)
#ifdef HAS_ARGBMULTIPLYROW_MSA
ANY21(ARGBMultiplyRow_Any_MSA, ARGBMultiplyRow_MSA, 0, 4, 4, 4, 3)
#endif
+#ifdef HAS_ARGBMULTIPLYROW_LSX
+ANY21(ARGBMultiplyRow_Any_LSX, ARGBMultiplyRow_LSX, 0, 4, 4, 4, 3)
+#endif
#ifdef HAS_ARGBMULTIPLYROW_LASX
ANY21(ARGBMultiplyRow_Any_LASX, ARGBMultiplyRow_LASX, 0, 4, 4, 4, 7)
#endif
#ifdef HAS_ARGBADDROW_MSA
ANY21(ARGBAddRow_Any_MSA, ARGBAddRow_MSA, 0, 4, 4, 4, 7)
#endif
+#ifdef HAS_ARGBADDROW_LSX
+ANY21(ARGBAddRow_Any_LSX, ARGBAddRow_LSX, 0, 4, 4, 4, 3)
+#endif
#ifdef HAS_ARGBADDROW_LASX
ANY21(ARGBAddRow_Any_LASX, ARGBAddRow_LASX, 0, 4, 4, 4, 7)
#endif
#ifdef HAS_ARGBSUBTRACTROW_MSA
ANY21(ARGBSubtractRow_Any_MSA, ARGBSubtractRow_MSA, 0, 4, 4, 4, 7)
#endif
+#ifdef HAS_ARGBSUBTRACTROW_LSX
+ANY21(ARGBSubtractRow_Any_LSX, ARGBSubtractRow_LSX, 0, 4, 4, 4, 3)
+#endif
#ifdef HAS_ARGBSUBTRACTROW_LASX
ANY21(ARGBSubtractRow_Any_LASX, ARGBSubtractRow_LASX, 0, 4, 4, 4, 7)
#endif
@@ -664,22 +701,53 @@ ANY21(SobelXYRow_Any_LSX, SobelXYRow_LSX, 0, 1, 1, 4, 15)
#endif
#undef ANY21
+// Any 2 planes to 1 with stride
+// width is measured in source pixels. 4 bytes contains 2 pixels
+#define ANY21S(NAMEANY, ANY_SIMD, SBPP, BPP, MASK) \
+ void NAMEANY(const uint8_t* src_yuy2, int stride_yuy2, uint8_t* dst_uv, \
+ int width) { \
+ SIMD_ALIGNED(uint8_t vin[32 * 2]); \
+ SIMD_ALIGNED(uint8_t vout[32]); \
+ memset(vin, 0, sizeof(vin)); /* for msan */ \
+ int awidth = (width + 1) / 2; \
+ int r = awidth & MASK; \
+ int n = awidth & ~MASK; \
+ if (n > 0) { \
+ ANY_SIMD(src_yuy2, stride_yuy2, dst_uv, n * 2); \
+ } \
+ memcpy(vin, src_yuy2 + n * SBPP, r * SBPP); \
+ memcpy(vin + 32, src_yuy2 + stride_yuy2 + n * SBPP, r * SBPP); \
+ ANY_SIMD(vin, 32, vout, MASK + 1); \
+ memcpy(dst_uv + n * BPP, vout, r * BPP); \
+ }
+
+#ifdef HAS_YUY2TONVUVROW_NEON
+ANY21S(YUY2ToNVUVRow_Any_NEON, YUY2ToNVUVRow_NEON, 4, 2, 7)
+#endif
+#ifdef HAS_YUY2TONVUVROW_SSE2
+ANY21S(YUY2ToNVUVRow_Any_SSE2, YUY2ToNVUVRow_SSE2, 4, 2, 7)
+#endif
+#ifdef HAS_YUY2TONVUVROW_AVX2
+ANY21S(YUY2ToNVUVRow_Any_AVX2, YUY2ToNVUVRow_AVX2, 4, 2, 15)
+#endif
+
// Any 2 planes to 1 with yuvconstants
#define ANY21C(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, SBPP2, BPP, MASK) \
void NAMEANY(const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* dst_ptr, \
const struct YuvConstants* yuvconstants, int width) { \
- SIMD_ALIGNED(uint8_t temp[128 * 3]); \
- memset(temp, 0, 128 * 2); /* for msan */ \
+ SIMD_ALIGNED(uint8_t vin[128 * 2]); \
+ SIMD_ALIGNED(uint8_t vout[128]); \
+ memset(vin, 0, sizeof(vin)); /* for msan */ \
int r = width & MASK; \
int n = width & ~MASK; \
if (n > 0) { \
ANY_SIMD(y_buf, uv_buf, dst_ptr, yuvconstants, n); \
} \
- memcpy(temp, y_buf + n * SBPP, r * SBPP); \
- memcpy(temp + 128, uv_buf + (n >> UVSHIFT) * SBPP2, \
+ memcpy(vin, y_buf + n * SBPP, r * SBPP); \
+ memcpy(vin + 128, uv_buf + (n >> UVSHIFT) * SBPP2, \
SS(r, UVSHIFT) * SBPP2); \
- ANY_SIMD(temp, temp + 128, temp + 256, yuvconstants, MASK + 1); \
- memcpy(dst_ptr + n * BPP, temp + 256, r * BPP); \
+ ANY_SIMD(vin, vin + 128, vout, yuvconstants, MASK + 1); \
+ memcpy(dst_ptr + n * BPP, vout, r * BPP); \
}
// Biplanar to RGB.
@@ -758,21 +826,21 @@ ANY21C(NV12ToRGB565Row_Any_LASX, NV12ToRGB565Row_LASX, 1, 1, 2, 2, 15)
#undef ANY21C
// Any 2 planes of 16 bit to 1 with yuvconstants
-#define ANY21CT(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, T, SBPP, BPP, MASK) \
- void NAMEANY(const T* y_buf, const T* uv_buf, uint8_t* dst_ptr, \
- const struct YuvConstants* yuvconstants, int width) { \
- SIMD_ALIGNED(T temp[16 * 3]); \
- SIMD_ALIGNED(uint8_t out[64]); \
- memset(temp, 0, 16 * 3 * SBPP); /* for YUY2 and msan */ \
- int r = width & MASK; \
- int n = width & ~MASK; \
- if (n > 0) { \
- ANY_SIMD(y_buf, uv_buf, dst_ptr, yuvconstants, n); \
- } \
- memcpy(temp, y_buf + n, r * SBPP); \
- memcpy(temp + 16, uv_buf + 2 * (n >> UVSHIFT), SS(r, UVSHIFT) * SBPP * 2); \
- ANY_SIMD(temp, temp + 16, out, yuvconstants, MASK + 1); \
- memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, out, SS(r, DUVSHIFT) * BPP); \
+#define ANY21CT(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, T, SBPP, BPP, MASK) \
+ void NAMEANY(const T* y_buf, const T* uv_buf, uint8_t* dst_ptr, \
+ const struct YuvConstants* yuvconstants, int width) { \
+ SIMD_ALIGNED(T vin[16 * 2]); \
+ SIMD_ALIGNED(uint8_t vout[64]); \
+ memset(vin, 0, sizeof(vin)); /* for msan */ \
+ int r = width & MASK; \
+ int n = width & ~MASK; \
+ if (n > 0) { \
+ ANY_SIMD(y_buf, uv_buf, dst_ptr, yuvconstants, n); \
+ } \
+ memcpy(vin, y_buf + n, r * SBPP); \
+ memcpy(vin + 16, uv_buf + 2 * (n >> UVSHIFT), SS(r, UVSHIFT) * SBPP * 2); \
+ ANY_SIMD(vin, vin + 16, vout, yuvconstants, MASK + 1); \
+ memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, vout, SS(r, DUVSHIFT) * BPP); \
}
#ifdef HAS_P210TOAR30ROW_SSSE3
@@ -806,21 +874,22 @@ ANY21CT(P410ToAR30Row_Any_AVX2, P410ToAR30Row_AVX2, 0, 0, uint16_t, 2, 4, 15)
#define ANY21PT(NAMEANY, ANY_SIMD, T, BPP, MASK) \
void NAMEANY(const T* src_u, const T* src_v, T* dst_uv, int depth, \
int width) { \
- SIMD_ALIGNED(T temp[16 * 4]); \
- memset(temp, 0, 16 * 4 * BPP); /* for msan */ \
+ SIMD_ALIGNED(T vin[16 * 2]); \
+ SIMD_ALIGNED(T vout[16]); \
+ memset(vin, 0, sizeof(vin)); /* for msan */ \
int r = width & MASK; \
int n = width & ~MASK; \
if (n > 0) { \
ANY_SIMD(src_u, src_v, dst_uv, depth, n); \
} \
- memcpy(temp, src_u + n, r * BPP); \
- memcpy(temp + 16, src_v + n, r * BPP); \
- ANY_SIMD(temp, temp + 16, temp + 32, depth, MASK + 1); \
- memcpy(dst_uv + n * 2, temp + 32, r * BPP * 2); \
+ memcpy(vin, src_u + n, r * BPP); \
+ memcpy(vin + 16, src_v + n, r * BPP); \
+ ANY_SIMD(vin, vin + 16, vout, depth, MASK + 1); \
+ memcpy(dst_uv + n * 2, vout, r * BPP * 2); \
}
#ifdef HAS_MERGEUVROW_16_AVX2
-ANY21PT(MergeUVRow_16_Any_AVX2, MergeUVRow_16_AVX2, uint16_t, 2, 15)
+ANY21PT(MergeUVRow_16_Any_AVX2, MergeUVRow_16_AVX2, uint16_t, 2, 7)
#endif
#ifdef HAS_MERGEUVROW_16_NEON
ANY21PT(MergeUVRow_16_Any_NEON, MergeUVRow_16_NEON, uint16_t, 2, 7)
@@ -829,18 +898,19 @@ ANY21PT(MergeUVRow_16_Any_NEON, MergeUVRow_16_NEON, uint16_t, 2, 7)
#undef ANY21CT
// Any 1 to 1.
-#define ANY11(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, BPP, MASK) \
- void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_ptr, int width) { \
- SIMD_ALIGNED(uint8_t temp[128 * 2]); \
- memset(temp, 0, 128); /* for YUY2 and msan */ \
- int r = width & MASK; \
- int n = width & ~MASK; \
- if (n > 0) { \
- ANY_SIMD(src_ptr, dst_ptr, n); \
- } \
- memcpy(temp, src_ptr + (n >> UVSHIFT) * SBPP, SS(r, UVSHIFT) * SBPP); \
- ANY_SIMD(temp, temp + 128, MASK + 1); \
- memcpy(dst_ptr + n * BPP, temp + 128, r * BPP); \
+#define ANY11(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, BPP, MASK) \
+ void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_ptr, int width) { \
+ SIMD_ALIGNED(uint8_t vin[128]); \
+ SIMD_ALIGNED(uint8_t vout[128]); \
+ memset(vin, 0, sizeof(vin)); /* for YUY2 and msan */ \
+ int r = width & MASK; \
+ int n = width & ~MASK; \
+ if (n > 0) { \
+ ANY_SIMD(src_ptr, dst_ptr, n); \
+ } \
+ memcpy(vin, src_ptr + (n >> UVSHIFT) * SBPP, SS(r, UVSHIFT) * SBPP); \
+ ANY_SIMD(vin, vout, MASK + 1); \
+ memcpy(dst_ptr + n * BPP, vout, r * BPP); \
}
#ifdef HAS_COPYROW_AVX
@@ -931,6 +1001,13 @@ ANY11(ARGBToARGB1555Row_Any_MSA, ARGBToARGB1555Row_MSA, 0, 4, 2, 7)
ANY11(ARGBToARGB4444Row_Any_MSA, ARGBToARGB4444Row_MSA, 0, 4, 2, 7)
ANY11(J400ToARGBRow_Any_MSA, J400ToARGBRow_MSA, 0, 1, 4, 15)
#endif
+#if defined(HAS_ARGBTORGB24ROW_LSX)
+ANY11(ARGBToRGB24Row_Any_LSX, ARGBToRGB24Row_LSX, 0, 4, 3, 15)
+ANY11(ARGBToRAWRow_Any_LSX, ARGBToRAWRow_LSX, 0, 4, 3, 15)
+ANY11(ARGBToRGB565Row_Any_LSX, ARGBToRGB565Row_LSX, 0, 4, 2, 7)
+ANY11(ARGBToARGB1555Row_Any_LSX, ARGBToARGB1555Row_LSX, 0, 4, 2, 7)
+ANY11(ARGBToARGB4444Row_Any_LSX, ARGBToARGB4444Row_LSX, 0, 4, 2, 7)
+#endif
#if defined(HAS_ARGBTORGB24ROW_LASX)
ANY11(ARGBToRGB24Row_Any_LASX, ARGBToRGB24Row_LASX, 0, 4, 3, 31)
ANY11(ARGBToRAWRow_Any_LASX, ARGBToRAWRow_LASX, 0, 4, 3, 31)
@@ -959,6 +1036,9 @@ ANY11(ABGRToYRow_Any_AVX2, ABGRToYRow_AVX2, 0, 4, 1, 31)
#ifdef HAS_ARGBTOYJROW_AVX2
ANY11(ARGBToYJRow_Any_AVX2, ARGBToYJRow_AVX2, 0, 4, 1, 31)
#endif
+#ifdef HAS_ABGRTOYJROW_AVX2
+ANY11(ABGRToYJRow_Any_AVX2, ABGRToYJRow_AVX2, 0, 4, 1, 31)
+#endif
#ifdef HAS_RGBATOYJROW_AVX2
ANY11(RGBAToYJRow_Any_AVX2, RGBAToYJRow_AVX2, 0, 4, 1, 31)
#endif
@@ -983,6 +1063,9 @@ ANY11(UYVYToYRow_Any_SSE2, UYVYToYRow_SSE2, 1, 4, 1, 15)
#ifdef HAS_ARGBTOYJROW_SSSE3
ANY11(ARGBToYJRow_Any_SSSE3, ARGBToYJRow_SSSE3, 0, 4, 1, 15)
#endif
+#ifdef HAS_ABGRTOYJROW_SSSE3
+ANY11(ABGRToYJRow_Any_SSSE3, ABGRToYJRow_SSSE3, 0, 4, 1, 15)
+#endif
#ifdef HAS_RGBATOYJROW_SSSE3
ANY11(RGBAToYJRow_Any_SSSE3, RGBAToYJRow_SSSE3, 0, 4, 1, 15)
#endif
@@ -992,12 +1075,18 @@ ANY11(ARGBToYRow_Any_NEON, ARGBToYRow_NEON, 0, 4, 1, 15)
#ifdef HAS_ARGBTOYROW_MSA
ANY11(ARGBToYRow_Any_MSA, ARGBToYRow_MSA, 0, 4, 1, 15)
#endif
+#ifdef HAS_ARGBTOYROW_LSX
+ANY11(ARGBToYRow_Any_LSX, ARGBToYRow_LSX, 0, 4, 1, 15)
+#endif
#ifdef HAS_ARGBTOYROW_LASX
ANY11(ARGBToYRow_Any_LASX, ARGBToYRow_LASX, 0, 4, 1, 31)
#endif
#ifdef HAS_ARGBTOYJROW_NEON
ANY11(ARGBToYJRow_Any_NEON, ARGBToYJRow_NEON, 0, 4, 1, 15)
#endif
+#ifdef HAS_ABGRTOYJROW_NEON
+ANY11(ABGRToYJRow_Any_NEON, ABGRToYJRow_NEON, 0, 4, 1, 15)
+#endif
#ifdef HAS_RGBATOYJROW_NEON
ANY11(RGBAToYJRow_Any_NEON, RGBAToYJRow_NEON, 0, 4, 1, 15)
#endif
@@ -1007,9 +1096,21 @@ ANY11(ARGBToYJRow_Any_MSA, ARGBToYJRow_MSA, 0, 4, 1, 15)
#ifdef HAS_ARGBTOYJROW_LSX
ANY11(ARGBToYJRow_Any_LSX, ARGBToYJRow_LSX, 0, 4, 1, 15)
#endif
+#ifdef HAS_RGBATOYJROW_LSX
+ANY11(RGBAToYJRow_Any_LSX, RGBAToYJRow_LSX, 0, 4, 1, 15)
+#endif
+#ifdef HAS_ABGRTOYJROW_LSX
+ANY11(ABGRToYJRow_Any_LSX, ABGRToYJRow_LSX, 0, 4, 1, 15)
+#endif
+#ifdef HAS_RGBATOYJROW_LASX
+ANY11(RGBAToYJRow_Any_LASX, RGBAToYJRow_LASX, 0, 4, 1, 31)
+#endif
#ifdef HAS_ARGBTOYJROW_LASX
ANY11(ARGBToYJRow_Any_LASX, ARGBToYJRow_LASX, 0, 4, 1, 31)
#endif
+#ifdef HAS_ABGRTOYJROW_LASX
+ANY11(ABGRToYJRow_Any_LASX, ABGRToYJRow_LASX, 0, 4, 1, 31)
+#endif
#ifdef HAS_BGRATOYROW_NEON
ANY11(BGRAToYRow_Any_NEON, BGRAToYRow_NEON, 0, 4, 1, 15)
#endif
@@ -1019,6 +1120,9 @@ ANY11(BGRAToYRow_Any_MSA, BGRAToYRow_MSA, 0, 4, 1, 15)
#ifdef HAS_BGRATOYROW_LSX
ANY11(BGRAToYRow_Any_LSX, BGRAToYRow_LSX, 0, 4, 1, 15)
#endif
+#ifdef HAS_BGRATOYROW_LASX
+ANY11(BGRAToYRow_Any_LASX, BGRAToYRow_LASX, 0, 4, 1, 31)
+#endif
#ifdef HAS_ABGRTOYROW_NEON
ANY11(ABGRToYRow_Any_NEON, ABGRToYRow_NEON, 0, 4, 1, 15)
#endif
@@ -1028,6 +1132,9 @@ ANY11(ABGRToYRow_Any_MSA, ABGRToYRow_MSA, 0, 4, 1, 7)
#ifdef HAS_ABGRTOYROW_LSX
ANY11(ABGRToYRow_Any_LSX, ABGRToYRow_LSX, 0, 4, 1, 15)
#endif
+#ifdef HAS_ABGRTOYROW_LASX
+ANY11(ABGRToYRow_Any_LASX, ABGRToYRow_LASX, 0, 4, 1, 31)
+#endif
#ifdef HAS_RGBATOYROW_NEON
ANY11(RGBAToYRow_Any_NEON, RGBAToYRow_NEON, 0, 4, 1, 15)
#endif
@@ -1037,6 +1144,9 @@ ANY11(RGBAToYRow_Any_MSA, RGBAToYRow_MSA, 0, 4, 1, 15)
#ifdef HAS_RGBATOYROW_LSX
ANY11(RGBAToYRow_Any_LSX, RGBAToYRow_LSX, 0, 4, 1, 15)
#endif
+#ifdef HAS_RGBATOYROW_LASX
+ANY11(RGBAToYRow_Any_LASX, RGBAToYRow_LASX, 0, 4, 1, 31)
+#endif
#ifdef HAS_RGB24TOYROW_NEON
ANY11(RGB24ToYRow_Any_NEON, RGB24ToYRow_NEON, 0, 3, 1, 15)
#endif
@@ -1055,6 +1165,12 @@ ANY11(RGB24ToYRow_Any_MSA, RGB24ToYRow_MSA, 0, 3, 1, 15)
#ifdef HAS_RGB24TOYROW_LSX
ANY11(RGB24ToYRow_Any_LSX, RGB24ToYRow_LSX, 0, 3, 1, 15)
#endif
+#ifdef HAS_RGB24TOYJROW_LSX
+ANY11(RGB24ToYJRow_Any_LSX, RGB24ToYJRow_LSX, 0, 3, 1, 15)
+#endif
+#ifdef HAS_RGB24TOYJROW_LASX
+ANY11(RGB24ToYJRow_Any_LASX, RGB24ToYJRow_LASX, 0, 3, 1, 31)
+#endif
#ifdef HAS_RGB24TOYROW_LASX
ANY11(RGB24ToYRow_Any_LASX, RGB24ToYRow_LASX, 0, 3, 1, 31)
#endif
@@ -1079,6 +1195,12 @@ ANY11(RAWToYRow_Any_LSX, RAWToYRow_LSX, 0, 3, 1, 15)
#ifdef HAS_RAWTOYROW_LASX
ANY11(RAWToYRow_Any_LASX, RAWToYRow_LASX, 0, 3, 1, 31)
#endif
+#ifdef HAS_RAWTOYJROW_LSX
+ANY11(RAWToYJRow_Any_LSX, RAWToYJRow_LSX, 0, 3, 1, 15)
+#endif
+#ifdef HAS_RAWTOYJROW_LASX
+ANY11(RAWToYJRow_Any_LASX, RAWToYJRow_LASX, 0, 3, 1, 31)
+#endif
#ifdef HAS_RGB565TOYROW_NEON
ANY11(RGB565ToYRow_Any_NEON, RGB565ToYRow_NEON, 0, 2, 1, 7)
#endif
@@ -1115,12 +1237,18 @@ ANY11(UYVYToYRow_Any_NEON, UYVYToYRow_NEON, 1, 4, 1, 15)
#ifdef HAS_YUY2TOYROW_MSA
ANY11(YUY2ToYRow_Any_MSA, YUY2ToYRow_MSA, 1, 4, 1, 31)
#endif
+#ifdef HAS_YUY2TOYROW_LSX
+ANY11(YUY2ToYRow_Any_LSX, YUY2ToYRow_LSX, 1, 4, 1, 15)
+#endif
#ifdef HAS_YUY2TOYROW_LASX
ANY11(YUY2ToYRow_Any_LASX, YUY2ToYRow_LASX, 1, 4, 1, 31)
#endif
#ifdef HAS_UYVYTOYROW_MSA
ANY11(UYVYToYRow_Any_MSA, UYVYToYRow_MSA, 1, 4, 1, 31)
#endif
+#ifdef HAS_UYVYTOYROW_LSX
+ANY11(UYVYToYRow_Any_LSX, UYVYToYRow_LSX, 1, 4, 1, 15)
+#endif
#ifdef HAS_UYVYTOYROW_LASX
ANY11(UYVYToYRow_Any_LASX, UYVYToYRow_LASX, 1, 4, 1, 31)
#endif
@@ -1217,6 +1345,9 @@ ANY11(ARGBAttenuateRow_Any_NEON, ARGBAttenuateRow_NEON, 0, 4, 4, 7)
#ifdef HAS_ARGBATTENUATEROW_MSA
ANY11(ARGBAttenuateRow_Any_MSA, ARGBAttenuateRow_MSA, 0, 4, 4, 7)
#endif
+#ifdef HAS_ARGBATTENUATEROW_LSX
+ANY11(ARGBAttenuateRow_Any_LSX, ARGBAttenuateRow_LSX, 0, 4, 4, 7)
+#endif
#ifdef HAS_ARGBATTENUATEROW_LASX
ANY11(ARGBAttenuateRow_Any_LASX, ARGBAttenuateRow_LASX, 0, 4, 4, 15)
#endif
@@ -1238,19 +1369,21 @@ ANY11(ARGBExtractAlphaRow_Any_LSX, ARGBExtractAlphaRow_LSX, 0, 4, 1, 15)
#undef ANY11
// Any 1 to 1 blended. Destination is read, modify, write.
-#define ANY11B(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, BPP, MASK) \
- void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_ptr, int width) { \
- SIMD_ALIGNED(uint8_t temp[64 * 2]); \
- memset(temp, 0, 64 * 2); /* for msan */ \
- int r = width & MASK; \
- int n = width & ~MASK; \
- if (n > 0) { \
- ANY_SIMD(src_ptr, dst_ptr, n); \
- } \
- memcpy(temp, src_ptr + (n >> UVSHIFT) * SBPP, SS(r, UVSHIFT) * SBPP); \
- memcpy(temp + 64, dst_ptr + n * BPP, r * BPP); \
- ANY_SIMD(temp, temp + 64, MASK + 1); \
- memcpy(dst_ptr + n * BPP, temp + 64, r * BPP); \
+#define ANY11B(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, BPP, MASK) \
+ void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_ptr, int width) { \
+ SIMD_ALIGNED(uint8_t vin[64]); \
+ SIMD_ALIGNED(uint8_t vout[64]); \
+ memset(vin, 0, sizeof(vin)); /* for msan */ \
+ memset(vout, 0, sizeof(vout)); /* for msan */ \
+ int r = width & MASK; \
+ int n = width & ~MASK; \
+ if (n > 0) { \
+ ANY_SIMD(src_ptr, dst_ptr, n); \
+ } \
+ memcpy(vin, src_ptr + (n >> UVSHIFT) * SBPP, SS(r, UVSHIFT) * SBPP); \
+ memcpy(vout, dst_ptr + n * BPP, r * BPP); \
+ ANY_SIMD(vin, vout, MASK + 1); \
+ memcpy(dst_ptr + n * BPP, vout, r * BPP); \
}
#ifdef HAS_ARGBCOPYALPHAROW_AVX2
@@ -1270,16 +1403,17 @@ ANY11B(ARGBCopyYToAlphaRow_Any_SSE2, ARGBCopyYToAlphaRow_SSE2, 0, 1, 4, 7)
// Any 1 to 1 with parameter.
#define ANY11P(NAMEANY, ANY_SIMD, T, SBPP, BPP, MASK) \
void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_ptr, T param, int width) { \
- SIMD_ALIGNED(uint8_t temp[64 * 2]); \
- memset(temp, 0, 64); /* for msan */ \
+ SIMD_ALIGNED(uint8_t vin[64]); \
+ SIMD_ALIGNED(uint8_t vout[64]); \
+ memset(vin, 0, sizeof(vin)); /* for msan */ \
int r = width & MASK; \
int n = width & ~MASK; \
if (n > 0) { \
ANY_SIMD(src_ptr, dst_ptr, param, n); \
} \
- memcpy(temp, src_ptr + n * SBPP, r * SBPP); \
- ANY_SIMD(temp, temp + 64, param, MASK + 1); \
- memcpy(dst_ptr + n * BPP, temp + 64, r * BPP); \
+ memcpy(vin, src_ptr + n * SBPP, r * SBPP); \
+ ANY_SIMD(vin, vout, param, MASK + 1); \
+ memcpy(dst_ptr + n * BPP, vout, r * BPP); \
}
#if defined(HAS_I400TOARGBROW_SSE2)
@@ -1355,6 +1489,14 @@ ANY11P(ARGBToRGB565DitherRow_Any_MSA,
2,
7)
#endif
+#if defined(HAS_ARGBTORGB565DITHERROW_LSX)
+ANY11P(ARGBToRGB565DitherRow_Any_LSX,
+ ARGBToRGB565DitherRow_LSX,
+ const uint32_t,
+ 4,
+ 2,
+ 7)
+#endif
#if defined(HAS_ARGBTORGB565DITHERROW_LASX)
ANY11P(ARGBToRGB565DitherRow_Any_LASX,
ARGBToRGB565DitherRow_LASX,
@@ -1375,6 +1517,9 @@ ANY11P(ARGBShuffleRow_Any_NEON, ARGBShuffleRow_NEON, const uint8_t*, 4, 4, 3)
#ifdef HAS_ARGBSHUFFLEROW_MSA
ANY11P(ARGBShuffleRow_Any_MSA, ARGBShuffleRow_MSA, const uint8_t*, 4, 4, 7)
#endif
+#ifdef HAS_ARGBSHUFFLEROW_LSX
+ANY11P(ARGBShuffleRow_Any_LSX, ARGBShuffleRow_LSX, const uint8_t*, 4, 4, 7)
+#endif
#ifdef HAS_ARGBSHUFFLEROW_LASX
ANY11P(ARGBShuffleRow_Any_LASX, ARGBShuffleRow_LASX, const uint8_t*, 4, 4, 15)
#endif
@@ -1384,17 +1529,17 @@ ANY11P(ARGBShuffleRow_Any_LASX, ARGBShuffleRow_LASX, const uint8_t*, 4, 4, 15)
// Any 1 to 1 with type
#define ANY11T(NAMEANY, ANY_SIMD, SBPP, BPP, STYPE, DTYPE, MASK) \
void NAMEANY(const STYPE* src_ptr, DTYPE* dst_ptr, int width) { \
- SIMD_ALIGNED(uint8_t temp[(MASK + 1) * SBPP]); \
- SIMD_ALIGNED(uint8_t out[(MASK + 1) * BPP]); \
- memset(temp, 0, (MASK + 1) * SBPP); /* for msan */ \
+ SIMD_ALIGNED(uint8_t vin[(MASK + 1) * SBPP]); \
+ SIMD_ALIGNED(uint8_t vout[(MASK + 1) * BPP]); \
+ memset(vin, 0, sizeof(vin)); /* for msan */ \
int r = width & MASK; \
int n = width & ~MASK; \
if (n > 0) { \
ANY_SIMD(src_ptr, dst_ptr, n); \
} \
- memcpy(temp, (uint8_t*)(src_ptr) + n * SBPP, r * SBPP); \
- ANY_SIMD((STYPE*)temp, (DTYPE*)out, MASK + 1); \
- memcpy((uint8_t*)(dst_ptr) + n * BPP, out, r * BPP); \
+ memcpy(vin, (uint8_t*)(src_ptr) + n * SBPP, r * SBPP); \
+ ANY_SIMD((STYPE*)vin, (DTYPE*)vout, MASK + 1); \
+ memcpy((uint8_t*)(dst_ptr) + n * BPP, vout, r * BPP); \
}
#ifdef HAS_ARGBTOAR64ROW_SSSE3
@@ -1450,17 +1595,17 @@ ANY11T(AB64ToARGBRow_Any_NEON, AB64ToARGBRow_NEON, 8, 4, uint16_t, uint8_t, 7)
// Any 1 to 1 with parameter and shorts. BPP measures in shorts.
#define ANY11C(NAMEANY, ANY_SIMD, SBPP, BPP, STYPE, DTYPE, MASK) \
void NAMEANY(const STYPE* src_ptr, DTYPE* dst_ptr, int scale, int width) { \
- SIMD_ALIGNED(STYPE temp[32]); \
- SIMD_ALIGNED(DTYPE out[32]); \
- memset(temp, 0, 32 * SBPP); /* for msan */ \
+ SIMD_ALIGNED(STYPE vin[32]); \
+ SIMD_ALIGNED(DTYPE vout[32]); \
+ memset(vin, 0, sizeof(vin)); /* for msan */ \
int r = width & MASK; \
int n = width & ~MASK; \
if (n > 0) { \
ANY_SIMD(src_ptr, dst_ptr, scale, n); \
} \
- memcpy(temp, src_ptr + n, r * SBPP); \
- ANY_SIMD(temp, out, scale, MASK + 1); \
- memcpy(dst_ptr + n, out, r * BPP); \
+ memcpy(vin, src_ptr + n, r * SBPP); \
+ ANY_SIMD(vin, vout, scale, MASK + 1); \
+ memcpy(dst_ptr + n, vout, r * BPP); \
}
#ifdef HAS_CONVERT16TO8ROW_SSSE3
@@ -1537,17 +1682,17 @@ ANY11C(DivideRow_16_Any_NEON, DivideRow_16_NEON, 2, 2, uint16_t, uint16_t, 15)
// Any 1 to 1 with parameter and shorts to byte. BPP measures in shorts.
#define ANY11P16(NAMEANY, ANY_SIMD, ST, T, SBPP, BPP, MASK) \
void NAMEANY(const ST* src_ptr, T* dst_ptr, float param, int width) { \
- SIMD_ALIGNED(ST temp[32]); \
- SIMD_ALIGNED(T out[32]); \
- memset(temp, 0, SBPP * 32); /* for msan */ \
+ SIMD_ALIGNED(ST vin[32]); \
+ SIMD_ALIGNED(T vout[32]); \
+ memset(vin, 0, sizeof(vin)); /* for msan */ \
int r = width & MASK; \
int n = width & ~MASK; \
if (n > 0) { \
ANY_SIMD(src_ptr, dst_ptr, param, n); \
} \
- memcpy(temp, src_ptr + n, r * SBPP); \
- ANY_SIMD(temp, out, param, MASK + 1); \
- memcpy(dst_ptr + n, out, r * BPP); \
+ memcpy(vin, src_ptr + n, r * SBPP); \
+ ANY_SIMD(vin, vout, param, MASK + 1); \
+ memcpy(dst_ptr + n, vout, r * BPP); \
}
#ifdef HAS_HALFFLOATROW_SSE2
@@ -1588,20 +1733,22 @@ ANY11P16(HalfFloatRow_Any_LSX, HalfFloatRow_LSX, uint16_t, uint16_t, 2, 2, 31)
#undef ANY11P16
// Any 1 to 1 with yuvconstants
-#define ANY11C(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, BPP, MASK) \
- void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_ptr, \
- const struct YuvConstants* yuvconstants, int width) { \
- SIMD_ALIGNED(uint8_t temp[128 * 2]); \
- memset(temp, 0, 128); /* for YUY2 and msan */ \
- int r = width & MASK; \
- int n = width & ~MASK; \
- if (n > 0) { \
- ANY_SIMD(src_ptr, dst_ptr, yuvconstants, n); \
- } \
- memcpy(temp, src_ptr + (n >> UVSHIFT) * SBPP, SS(r, UVSHIFT) * SBPP); \
- ANY_SIMD(temp, temp + 128, yuvconstants, MASK + 1); \
- memcpy(dst_ptr + n * BPP, temp + 128, r * BPP); \
+#define ANY11C(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, BPP, MASK) \
+ void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_ptr, \
+ const struct YuvConstants* yuvconstants, int width) { \
+ SIMD_ALIGNED(uint8_t vin[128]); \
+ SIMD_ALIGNED(uint8_t vout[128]); \
+ memset(vin, 0, sizeof(vin)); /* for YUY2 and msan */ \
+ int r = width & MASK; \
+ int n = width & ~MASK; \
+ if (n > 0) { \
+ ANY_SIMD(src_ptr, dst_ptr, yuvconstants, n); \
+ } \
+ memcpy(vin, src_ptr + (n >> UVSHIFT) * SBPP, SS(r, UVSHIFT) * SBPP); \
+ ANY_SIMD(vin, vout, yuvconstants, MASK + 1); \
+ memcpy(dst_ptr + n * BPP, vout, r * BPP); \
}
+
#if defined(HAS_YUY2TOARGBROW_SSSE3)
ANY11C(YUY2ToARGBRow_Any_SSSE3, YUY2ToARGBRow_SSSE3, 1, 4, 4, 15)
ANY11C(UYVYToARGBRow_Any_SSSE3, UYVYToARGBRow_SSSE3, 1, 4, 4, 15)
@@ -1628,21 +1775,21 @@ ANY11C(UYVYToARGBRow_Any_LSX, UYVYToARGBRow_LSX, 1, 4, 4, 7)
#define ANY11I(NAMEANY, ANY_SIMD, TD, TS, SBPP, BPP, MASK) \
void NAMEANY(TD* dst_ptr, const TS* src_ptr, ptrdiff_t src_stride, \
int width, int source_y_fraction) { \
- SIMD_ALIGNED(TS temps[64 * 2]); \
- SIMD_ALIGNED(TD tempd[64]); \
- memset(temps, 0, sizeof(temps)); /* for msan */ \
+ SIMD_ALIGNED(TS vin[64 * 2]); \
+ SIMD_ALIGNED(TD vout[64]); \
+ memset(vin, 0, sizeof(vin)); /* for msan */ \
int r = width & MASK; \
int n = width & ~MASK; \
if (n > 0) { \
ANY_SIMD(dst_ptr, src_ptr, src_stride, n, source_y_fraction); \
} \
- memcpy(temps, src_ptr + n * SBPP, r * SBPP * sizeof(TS)); \
+ memcpy(vin, src_ptr + n * SBPP, r * SBPP * sizeof(TS)); \
if (source_y_fraction) { \
- memcpy(temps + 64, src_ptr + src_stride + n * SBPP, \
+ memcpy(vin + 64, src_ptr + src_stride + n * SBPP, \
r * SBPP * sizeof(TS)); \
} \
- ANY_SIMD(tempd, temps, 64, MASK + 1, source_y_fraction); \
- memcpy(dst_ptr + n * BPP, tempd, r * BPP * sizeof(TD)); \
+ ANY_SIMD(vout, vin, 64, MASK + 1, source_y_fraction); \
+ memcpy(dst_ptr + n * BPP, vout, r * BPP * sizeof(TD)); \
}
#ifdef HAS_INTERPOLATEROW_AVX2
@@ -1682,21 +1829,21 @@ ANY11I(InterpolateRow_16_Any_NEON,
#define ANY11IS(NAMEANY, ANY_SIMD, TD, TS, SBPP, BPP, MASK) \
void NAMEANY(TD* dst_ptr, const TS* src_ptr, ptrdiff_t src_stride, \
int scale, int width, int source_y_fraction) { \
- SIMD_ALIGNED(TS temps[64 * 2]); \
- SIMD_ALIGNED(TD tempd[64]); \
- memset(temps, 0, sizeof(temps)); /* for msan */ \
+ SIMD_ALIGNED(TS vin[64 * 2]); \
+ SIMD_ALIGNED(TD vout[64]); \
+ memset(vin, 0, sizeof(vin)); /* for msan */ \
int r = width & MASK; \
int n = width & ~MASK; \
if (n > 0) { \
ANY_SIMD(dst_ptr, src_ptr, src_stride, scale, n, source_y_fraction); \
} \
- memcpy(temps, src_ptr + n * SBPP, r * SBPP * sizeof(TS)); \
+ memcpy(vin, src_ptr + n * SBPP, r * SBPP * sizeof(TS)); \
if (source_y_fraction) { \
- memcpy(temps + 64, src_ptr + src_stride + n * SBPP, \
+ memcpy(vin + 64, src_ptr + src_stride + n * SBPP, \
r * SBPP * sizeof(TS)); \
} \
- ANY_SIMD(tempd, temps, 64, scale, MASK + 1, source_y_fraction); \
- memcpy(dst_ptr + n * BPP, tempd, r * BPP * sizeof(TD)); \
+ ANY_SIMD(vout, vin, 64, scale, MASK + 1, source_y_fraction); \
+ memcpy(dst_ptr + n * BPP, vout, r * BPP * sizeof(TD)); \
}
#ifdef HAS_INTERPOLATEROW_16TO8_NEON
@@ -1721,18 +1868,19 @@ ANY11IS(InterpolateRow_16To8_Any_AVX2,
#undef ANY11IS
// Any 1 to 1 mirror.
-#define ANY11M(NAMEANY, ANY_SIMD, BPP, MASK) \
- void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_ptr, int width) { \
- SIMD_ALIGNED(uint8_t temp[64 * 2]); \
- memset(temp, 0, 64); /* for msan */ \
- int r = width & MASK; \
- int n = width & ~MASK; \
- if (n > 0) { \
- ANY_SIMD(src_ptr + r * BPP, dst_ptr, n); \
- } \
- memcpy(temp, src_ptr, r* BPP); \
- ANY_SIMD(temp, temp + 64, MASK + 1); \
- memcpy(dst_ptr + n * BPP, temp + 64 + (MASK + 1 - r) * BPP, r * BPP); \
+#define ANY11M(NAMEANY, ANY_SIMD, BPP, MASK) \
+ void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_ptr, int width) { \
+ SIMD_ALIGNED(uint8_t vin[64]); \
+ SIMD_ALIGNED(uint8_t vout[64]); \
+ memset(vin, 0, sizeof(vin)); /* for msan */ \
+ int r = width & MASK; \
+ int n = width & ~MASK; \
+ if (n > 0) { \
+ ANY_SIMD(src_ptr + r * BPP, dst_ptr, n); \
+ } \
+ memcpy(vin, src_ptr, r* BPP); \
+ ANY_SIMD(vin, vout, MASK + 1); \
+ memcpy(dst_ptr + n * BPP, vout + (MASK + 1 - r) * BPP, r * BPP); \
}
#ifdef HAS_MIRRORROW_AVX2
@@ -1747,6 +1895,9 @@ ANY11M(MirrorRow_Any_NEON, MirrorRow_NEON, 1, 31)
#ifdef HAS_MIRRORROW_MSA
ANY11M(MirrorRow_Any_MSA, MirrorRow_MSA, 1, 63)
#endif
+#ifdef HAS_MIRRORROW_LSX
+ANY11M(MirrorRow_Any_LSX, MirrorRow_LSX, 1, 31)
+#endif
#ifdef HAS_MIRRORROW_LASX
ANY11M(MirrorRow_Any_LASX, MirrorRow_LASX, 1, 63)
#endif
@@ -1762,6 +1913,9 @@ ANY11M(MirrorUVRow_Any_NEON, MirrorUVRow_NEON, 2, 31)
#ifdef HAS_MIRRORUVROW_MSA
ANY11M(MirrorUVRow_Any_MSA, MirrorUVRow_MSA, 2, 7)
#endif
+#ifdef HAS_MIRRORUVROW_LSX
+ANY11M(MirrorUVRow_Any_LSX, MirrorUVRow_LSX, 2, 7)
+#endif
#ifdef HAS_MIRRORUVROW_LASX
ANY11M(MirrorUVRow_Any_LASX, MirrorUVRow_LASX, 2, 15)
#endif
@@ -1777,6 +1931,9 @@ ANY11M(ARGBMirrorRow_Any_NEON, ARGBMirrorRow_NEON, 4, 7)
#ifdef HAS_ARGBMIRRORROW_MSA
ANY11M(ARGBMirrorRow_Any_MSA, ARGBMirrorRow_MSA, 4, 15)
#endif
+#ifdef HAS_ARGBMIRRORROW_LSX
+ANY11M(ARGBMirrorRow_Any_LSX, ARGBMirrorRow_LSX, 4, 7)
+#endif
#ifdef HAS_ARGBMIRRORROW_LASX
ANY11M(ARGBMirrorRow_Any_LASX, ARGBMirrorRow_LASX, 4, 15)
#endif
@@ -1791,15 +1948,14 @@ ANY11M(RGB24MirrorRow_Any_NEON, RGB24MirrorRow_NEON, 3, 15)
// Any 1 plane. (memset)
#define ANY1(NAMEANY, ANY_SIMD, T, BPP, MASK) \
void NAMEANY(uint8_t* dst_ptr, T v32, int width) { \
- SIMD_ALIGNED(uint8_t temp[64]); \
- memset(temp, 0, 64); /* for msan */ \
+ SIMD_ALIGNED(uint8_t vout[64]); \
int r = width & MASK; \
int n = width & ~MASK; \
if (n > 0) { \
ANY_SIMD(dst_ptr, v32, n); \
} \
- ANY_SIMD(temp, v32, MASK + 1); \
- memcpy(dst_ptr + n * BPP, temp, r * BPP); \
+ ANY_SIMD(vout, v32, MASK + 1); \
+ memcpy(dst_ptr + n * BPP, vout, r * BPP); \
}
#ifdef HAS_SETROW_X86
@@ -1823,20 +1979,21 @@ ANY1(ARGBSetRow_Any_LSX, ARGBSetRow_LSX, uint32_t, 4, 3)
#undef ANY1
// Any 1 to 2. Outputs UV planes.
-#define ANY12(NAMEANY, ANY_SIMD, UVSHIFT, BPP, DUVSHIFT, MASK) \
- void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_u, uint8_t* dst_v, \
- int width) { \
- SIMD_ALIGNED(uint8_t temp[128 * 3]); \
- memset(temp, 0, 128); /* for msan */ \
- int r = width & MASK; \
- int n = width & ~MASK; \
- if (n > 0) { \
- ANY_SIMD(src_ptr, dst_u, dst_v, n); \
- } \
- memcpy(temp, src_ptr + (n >> UVSHIFT) * BPP, SS(r, UVSHIFT) * BPP); \
- ANY_SIMD(temp, temp + 128, temp + 256, MASK + 1); \
- memcpy(dst_u + (n >> DUVSHIFT), temp + 128, SS(r, DUVSHIFT)); \
- memcpy(dst_v + (n >> DUVSHIFT), temp + 256, SS(r, DUVSHIFT)); \
+#define ANY12(NAMEANY, ANY_SIMD, UVSHIFT, BPP, DUVSHIFT, MASK) \
+ void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_u, uint8_t* dst_v, \
+ int width) { \
+ SIMD_ALIGNED(uint8_t vin[128]); \
+ SIMD_ALIGNED(uint8_t vout[128 * 2]); \
+ memset(vin, 0, sizeof(vin)); /* for msan */ \
+ int r = width & MASK; \
+ int n = width & ~MASK; \
+ if (n > 0) { \
+ ANY_SIMD(src_ptr, dst_u, dst_v, n); \
+ } \
+ memcpy(vin, src_ptr + (n >> UVSHIFT) * BPP, SS(r, UVSHIFT) * BPP); \
+ ANY_SIMD(vin, vout, vout + 128, MASK + 1); \
+ memcpy(dst_u + (n >> DUVSHIFT), vout, SS(r, DUVSHIFT)); \
+ memcpy(dst_v + (n >> DUVSHIFT), vout + 128, SS(r, DUVSHIFT)); \
}
#ifdef HAS_SPLITUVROW_SSE2
@@ -1875,6 +2032,11 @@ ANY12(ARGBToUV444Row_Any_MSA, ARGBToUV444Row_MSA, 0, 4, 0, 15)
ANY12(YUY2ToUV422Row_Any_MSA, YUY2ToUV422Row_MSA, 1, 4, 1, 31)
ANY12(UYVYToUV422Row_Any_MSA, UYVYToUV422Row_MSA, 1, 4, 1, 31)
#endif
+#ifdef HAS_YUY2TOUV422ROW_LSX
+ANY12(ARGBToUV444Row_Any_LSX, ARGBToUV444Row_LSX, 0, 4, 0, 15)
+ANY12(YUY2ToUV422Row_Any_LSX, YUY2ToUV422Row_LSX, 1, 4, 1, 15)
+ANY12(UYVYToUV422Row_Any_LSX, UYVYToUV422Row_LSX, 1, 4, 1, 15)
+#endif
#ifdef HAS_YUY2TOUV422ROW_LASX
ANY12(ARGBToUV444Row_Any_LASX, ARGBToUV444Row_LASX, 0, 4, 0, 31)
ANY12(YUY2ToUV422Row_Any_LASX, YUY2ToUV422Row_LASX, 1, 4, 1, 31)
@@ -1885,17 +2047,18 @@ ANY12(UYVYToUV422Row_Any_LASX, UYVYToUV422Row_LASX, 1, 4, 1, 31)
// Any 2 16 bit planes with parameter to 1
#define ANY12PT(NAMEANY, ANY_SIMD, T, BPP, MASK) \
void NAMEANY(const T* src_uv, T* dst_u, T* dst_v, int depth, int width) { \
- SIMD_ALIGNED(T temp[16 * 4]); \
- memset(temp, 0, 16 * 4 * BPP); /* for msan */ \
+ SIMD_ALIGNED(T vin[16 * 2]); \
+ SIMD_ALIGNED(T vout[16 * 2]); \
+ memset(vin, 0, sizeof(vin)); /* for msan */ \
int r = width & MASK; \
int n = width & ~MASK; \
if (n > 0) { \
ANY_SIMD(src_uv, dst_u, dst_v, depth, n); \
} \
- memcpy(temp, src_uv + n * 2, r * BPP * 2); \
- ANY_SIMD(temp, temp + 32, temp + 48, depth, MASK + 1); \
- memcpy(dst_u + n, temp + 32, r * BPP); \
- memcpy(dst_v + n, temp + 48, r * BPP); \
+ memcpy(vin, src_uv + n * 2, r * BPP * 2); \
+ ANY_SIMD(vin, vout, vout + 16, depth, MASK + 1); \
+ memcpy(dst_u + n, vout, r * BPP); \
+ memcpy(dst_v + n, vout + 16, r * BPP); \
}
#ifdef HAS_SPLITUVROW_16_AVX2
@@ -1909,21 +2072,22 @@ ANY12PT(SplitUVRow_16_Any_NEON, SplitUVRow_16_NEON, uint16_t, 2, 7)
#undef ANY21CT
// Any 1 to 3. Outputs RGB planes.
-#define ANY13(NAMEANY, ANY_SIMD, BPP, MASK) \
- void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_r, uint8_t* dst_g, \
- uint8_t* dst_b, int width) { \
- SIMD_ALIGNED(uint8_t temp[16 * 6]); \
- memset(temp, 0, 16 * 3); /* for msan */ \
- int r = width & MASK; \
- int n = width & ~MASK; \
- if (n > 0) { \
- ANY_SIMD(src_ptr, dst_r, dst_g, dst_b, n); \
- } \
- memcpy(temp, src_ptr + n * BPP, r * BPP); \
- ANY_SIMD(temp, temp + 16 * 3, temp + 16 * 4, temp + 16 * 5, MASK + 1); \
- memcpy(dst_r + n, temp + 16 * 3, r); \
- memcpy(dst_g + n, temp + 16 * 4, r); \
- memcpy(dst_b + n, temp + 16 * 5, r); \
+#define ANY13(NAMEANY, ANY_SIMD, BPP, MASK) \
+ void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_r, uint8_t* dst_g, \
+ uint8_t* dst_b, int width) { \
+ SIMD_ALIGNED(uint8_t vin[16 * 3]); \
+ SIMD_ALIGNED(uint8_t vout[16 * 3]); \
+ memset(vin, 0, sizeof(vin)); /* for msan */ \
+ int r = width & MASK; \
+ int n = width & ~MASK; \
+ if (n > 0) { \
+ ANY_SIMD(src_ptr, dst_r, dst_g, dst_b, n); \
+ } \
+ memcpy(vin, src_ptr + n * BPP, r * BPP); \
+ ANY_SIMD(vin, vout, vout + 16, vout + 32, MASK + 1); \
+ memcpy(dst_r + n, vout, r); \
+ memcpy(dst_g + n, vout + 16, r); \
+ memcpy(dst_b + n, vout + 32, r); \
}
#ifdef HAS_SPLITRGBROW_SSSE3
@@ -1946,23 +2110,23 @@ ANY13(SplitXRGBRow_Any_NEON, SplitXRGBRow_NEON, 4, 15)
#endif
// Any 1 to 4. Outputs ARGB planes.
-#define ANY14(NAMEANY, ANY_SIMD, BPP, MASK) \
- void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_r, uint8_t* dst_g, \
- uint8_t* dst_b, uint8_t* dst_a, int width) { \
- SIMD_ALIGNED(uint8_t temp[16 * 8]); \
- memset(temp, 0, 16 * 4); /* for msan */ \
- int r = width & MASK; \
- int n = width & ~MASK; \
- if (n > 0) { \
- ANY_SIMD(src_ptr, dst_r, dst_g, dst_b, dst_a, n); \
- } \
- memcpy(temp, src_ptr + n * BPP, r * BPP); \
- ANY_SIMD(temp, temp + 16 * 4, temp + 16 * 5, temp + 16 * 6, temp + 16 * 7, \
- MASK + 1); \
- memcpy(dst_r + n, temp + 16 * 4, r); \
- memcpy(dst_g + n, temp + 16 * 5, r); \
- memcpy(dst_b + n, temp + 16 * 6, r); \
- memcpy(dst_a + n, temp + 16 * 7, r); \
+#define ANY14(NAMEANY, ANY_SIMD, BPP, MASK) \
+ void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_r, uint8_t* dst_g, \
+ uint8_t* dst_b, uint8_t* dst_a, int width) { \
+ SIMD_ALIGNED(uint8_t vin[16 * 4]); \
+ SIMD_ALIGNED(uint8_t vout[16 * 4]); \
+ memset(vin, 0, sizeof(vin)); /* for msan */ \
+ int r = width & MASK; \
+ int n = width & ~MASK; \
+ if (n > 0) { \
+ ANY_SIMD(src_ptr, dst_r, dst_g, dst_b, dst_a, n); \
+ } \
+ memcpy(vin, src_ptr + n * BPP, r * BPP); \
+ ANY_SIMD(vin, vout, vout + 16, vout + 32, vout + 48, MASK + 1); \
+ memcpy(dst_r + n, vout, r); \
+ memcpy(dst_g + n, vout + 16, r); \
+ memcpy(dst_b + n, vout + 32, r); \
+ memcpy(dst_a + n, vout + 48, r); \
}
#ifdef HAS_SPLITARGBROW_SSE2
@@ -1983,25 +2147,26 @@ ANY14(SplitARGBRow_Any_NEON, SplitARGBRow_NEON, 4, 15)
#define ANY12S(NAMEANY, ANY_SIMD, UVSHIFT, BPP, MASK) \
void NAMEANY(const uint8_t* src_ptr, int src_stride, uint8_t* dst_u, \
uint8_t* dst_v, int width) { \
- SIMD_ALIGNED(uint8_t temp[128 * 4]); \
- memset(temp, 0, 128 * 2); /* for msan */ \
+ SIMD_ALIGNED(uint8_t vin[128 * 2]); \
+ SIMD_ALIGNED(uint8_t vout[128 * 2]); \
+ memset(vin, 0, sizeof(vin)); /* for msan */ \
int r = width & MASK; \
int n = width & ~MASK; \
if (n > 0) { \
ANY_SIMD(src_ptr, src_stride, dst_u, dst_v, n); \
} \
- memcpy(temp, src_ptr + (n >> UVSHIFT) * BPP, SS(r, UVSHIFT) * BPP); \
- memcpy(temp + 128, src_ptr + src_stride + (n >> UVSHIFT) * BPP, \
+ memcpy(vin, src_ptr + (n >> UVSHIFT) * BPP, SS(r, UVSHIFT) * BPP); \
+ memcpy(vin + 128, src_ptr + src_stride + (n >> UVSHIFT) * BPP, \
SS(r, UVSHIFT) * BPP); \
if ((width & 1) && UVSHIFT == 0) { /* repeat last pixel for subsample */ \
- memcpy(temp + SS(r, UVSHIFT) * BPP, temp + SS(r, UVSHIFT) * BPP - BPP, \
+ memcpy(vin + SS(r, UVSHIFT) * BPP, vin + SS(r, UVSHIFT) * BPP - BPP, \
BPP); \
- memcpy(temp + 128 + SS(r, UVSHIFT) * BPP, \
- temp + 128 + SS(r, UVSHIFT) * BPP - BPP, BPP); \
+ memcpy(vin + 128 + SS(r, UVSHIFT) * BPP, \
+ vin + 128 + SS(r, UVSHIFT) * BPP - BPP, BPP); \
} \
- ANY_SIMD(temp, 128, temp + 256, temp + 384, MASK + 1); \
- memcpy(dst_u + (n >> 1), temp + 256, SS(r, 1)); \
- memcpy(dst_v + (n >> 1), temp + 384, SS(r, 1)); \
+ ANY_SIMD(vin, 128, vout, vout + 128, MASK + 1); \
+ memcpy(dst_u + (n >> 1), vout, SS(r, 1)); \
+ memcpy(dst_v + (n >> 1), vout + 128, SS(r, 1)); \
}
#ifdef HAS_ARGBTOUVROW_AVX2
@@ -2013,9 +2178,17 @@ ANY12S(ABGRToUVRow_Any_AVX2, ABGRToUVRow_AVX2, 0, 4, 31)
#ifdef HAS_ARGBTOUVJROW_AVX2
ANY12S(ARGBToUVJRow_Any_AVX2, ARGBToUVJRow_AVX2, 0, 4, 31)
#endif
+#ifdef HAS_ABGRTOUVJROW_AVX2
+ANY12S(ABGRToUVJRow_Any_AVX2, ABGRToUVJRow_AVX2, 0, 4, 31)
+#endif
+#ifdef HAS_ARGBTOUVJROW_SSSE3
+ANY12S(ARGBToUVJRow_Any_SSSE3, ARGBToUVJRow_SSSE3, 0, 4, 15)
+#endif
+#ifdef HAS_ABGRTOUVJROW_SSSE3
+ANY12S(ABGRToUVJRow_Any_SSSE3, ABGRToUVJRow_SSSE3, 0, 4, 15)
+#endif
#ifdef HAS_ARGBTOUVROW_SSSE3
ANY12S(ARGBToUVRow_Any_SSSE3, ARGBToUVRow_SSSE3, 0, 4, 15)
-ANY12S(ARGBToUVJRow_Any_SSSE3, ARGBToUVJRow_SSSE3, 0, 4, 15)
ANY12S(BGRAToUVRow_Any_SSSE3, BGRAToUVRow_SSSE3, 0, 4, 15)
ANY12S(ABGRToUVRow_Any_SSSE3, ABGRToUVRow_SSSE3, 0, 4, 15)
ANY12S(RGBAToUVRow_Any_SSSE3, RGBAToUVRow_SSSE3, 0, 4, 15)
@@ -2034,12 +2207,18 @@ ANY12S(ARGBToUVRow_Any_NEON, ARGBToUVRow_NEON, 0, 4, 15)
#ifdef HAS_ARGBTOUVROW_MSA
ANY12S(ARGBToUVRow_Any_MSA, ARGBToUVRow_MSA, 0, 4, 31)
#endif
+#ifdef HAS_ARGBTOUVROW_LSX
+ANY12S(ARGBToUVRow_Any_LSX, ARGBToUVRow_LSX, 0, 4, 15)
+#endif
#ifdef HAS_ARGBTOUVROW_LASX
ANY12S(ARGBToUVRow_Any_LASX, ARGBToUVRow_LASX, 0, 4, 31)
#endif
#ifdef HAS_ARGBTOUVJROW_NEON
ANY12S(ARGBToUVJRow_Any_NEON, ARGBToUVJRow_NEON, 0, 4, 15)
#endif
+#ifdef HAS_ABGRTOUVJROW_NEON
+ANY12S(ABGRToUVJRow_Any_NEON, ABGRToUVJRow_NEON, 0, 4, 15)
+#endif
#ifdef HAS_ARGBTOUVJROW_MSA
ANY12S(ARGBToUVJRow_Any_MSA, ARGBToUVJRow_MSA, 0, 4, 31)
#endif
@@ -2142,12 +2321,18 @@ ANY12S(UYVYToUVRow_Any_NEON, UYVYToUVRow_NEON, 1, 4, 15)
#ifdef HAS_YUY2TOUVROW_MSA
ANY12S(YUY2ToUVRow_Any_MSA, YUY2ToUVRow_MSA, 1, 4, 31)
#endif
+#ifdef HAS_YUY2TOUVROW_LSX
+ANY12S(YUY2ToUVRow_Any_LSX, YUY2ToUVRow_LSX, 1, 4, 15)
+#endif
#ifdef HAS_YUY2TOUVROW_LASX
ANY12S(YUY2ToUVRow_Any_LASX, YUY2ToUVRow_LASX, 1, 4, 31)
#endif
#ifdef HAS_UYVYTOUVROW_MSA
ANY12S(UYVYToUVRow_Any_MSA, UYVYToUVRow_MSA, 1, 4, 31)
#endif
+#ifdef HAS_UYVYTOUVROW_LSX
+ANY12S(UYVYToUVRow_Any_LSX, UYVYToUVRow_LSX, 1, 4, 15)
+#endif
#ifdef HAS_UYVYTOUVROW_LASX
ANY12S(UYVYToUVRow_Any_LASX, UYVYToUVRow_LASX, 1, 4, 31)
#endif
@@ -2158,24 +2343,25 @@ ANY12S(UYVYToUVRow_Any_LASX, UYVYToUVRow_LASX, 1, 4, 31)
#define ANY11S(NAMEANY, ANY_SIMD, UVSHIFT, BPP, MASK) \
void NAMEANY(const uint8_t* src_ptr, int src_stride, uint8_t* dst_vu, \
int width) { \
- SIMD_ALIGNED(uint8_t temp[128 * 3]); \
- memset(temp, 0, 128 * 2); /* for msan */ \
+ SIMD_ALIGNED(uint8_t vin[128 * 2]); \
+ SIMD_ALIGNED(uint8_t vout[128]); \
+ memset(vin, 0, sizeof(vin)); /* for msan */ \
int r = width & MASK; \
int n = width & ~MASK; \
if (n > 0) { \
ANY_SIMD(src_ptr, src_stride, dst_vu, n); \
} \
- memcpy(temp, src_ptr + (n >> UVSHIFT) * BPP, SS(r, UVSHIFT) * BPP); \
- memcpy(temp + 128, src_ptr + src_stride + (n >> UVSHIFT) * BPP, \
+ memcpy(vin, src_ptr + (n >> UVSHIFT) * BPP, SS(r, UVSHIFT) * BPP); \
+ memcpy(vin + 128, src_ptr + src_stride + (n >> UVSHIFT) * BPP, \
SS(r, UVSHIFT) * BPP); \
if ((width & 1) && UVSHIFT == 0) { /* repeat last pixel for subsample */ \
- memcpy(temp + SS(r, UVSHIFT) * BPP, temp + SS(r, UVSHIFT) * BPP - BPP, \
+ memcpy(vin + SS(r, UVSHIFT) * BPP, vin + SS(r, UVSHIFT) * BPP - BPP, \
BPP); \
- memcpy(temp + 128 + SS(r, UVSHIFT) * BPP, \
- temp + 128 + SS(r, UVSHIFT) * BPP - BPP, BPP); \
+ memcpy(vin + 128 + SS(r, UVSHIFT) * BPP, \
+ vin + 128 + SS(r, UVSHIFT) * BPP - BPP, BPP); \
} \
- ANY_SIMD(temp, 128, temp + 256, MASK + 1); \
- memcpy(dst_vu + (n >> 1) * 2, temp + 256, SS(r, 1) * 2); \
+ ANY_SIMD(vin, 128, vout, MASK + 1); \
+ memcpy(dst_vu + (n >> 1) * 2, vout, SS(r, 1) * 2); \
}
#ifdef HAS_AYUVTOVUROW_NEON
@@ -2184,42 +2370,53 @@ ANY11S(AYUVToVURow_Any_NEON, AYUVToVURow_NEON, 0, 4, 15)
#endif
#undef ANY11S
-#define ANYDETILE(NAMEANY, ANY_SIMD, MASK) \
- void NAMEANY(const uint8_t* src, ptrdiff_t src_tile_stride, uint8_t* dst, \
- int width) { \
- SIMD_ALIGNED(uint8_t temp[16 * 2]); \
- memset(temp, 0, 16); /* for msan */ \
- int r = width & MASK; \
- int n = width & ~MASK; \
- if (n > 0) { \
- ANY_SIMD(src, src_tile_stride, dst, n); \
- } \
- memcpy(temp, src + (n / 16) * src_tile_stride, r); \
- ANY_SIMD(temp, src_tile_stride, temp + 16, MASK + 1); \
- memcpy(dst + n, temp + 16, r); \
+#define ANYDETILE(NAMEANY, ANY_SIMD, T, BPP, MASK) \
+ void NAMEANY(const T* src, ptrdiff_t src_tile_stride, T* dst, int width) { \
+ SIMD_ALIGNED(T vin[16]); \
+ SIMD_ALIGNED(T vout[16]); \
+ memset(vin, 0, sizeof(vin)); /* for msan */ \
+ int r = width & MASK; \
+ int n = width & ~MASK; \
+ if (n > 0) { \
+ ANY_SIMD(src, src_tile_stride, dst, n); \
+ } \
+ memcpy(vin, src + (n / 16) * src_tile_stride, r * BPP); \
+ ANY_SIMD(vin, src_tile_stride, vout, MASK + 1); \
+ memcpy(dst + n, vout, r * BPP); \
}
#ifdef HAS_DETILEROW_NEON
-ANYDETILE(DetileRow_Any_NEON, DetileRow_NEON, 15)
+ANYDETILE(DetileRow_Any_NEON, DetileRow_NEON, uint8_t, 1, 15)
#endif
#ifdef HAS_DETILEROW_SSE2
-ANYDETILE(DetileRow_Any_SSE2, DetileRow_SSE2, 15)
+ANYDETILE(DetileRow_Any_SSE2, DetileRow_SSE2, uint8_t, 1, 15)
+#endif
+#ifdef HAS_DETILEROW_16_NEON
+ANYDETILE(DetileRow_16_Any_NEON, DetileRow_16_NEON, uint16_t, 2, 15)
+#endif
+#ifdef HAS_DETILEROW_16_SSE2
+ANYDETILE(DetileRow_16_Any_SSE2, DetileRow_16_SSE2, uint16_t, 2, 15)
+#endif
+#ifdef HAS_DETILEROW_16_AVX
+ANYDETILE(DetileRow_16_Any_AVX, DetileRow_16_AVX, uint16_t, 2, 15)
#endif
+// DetileSplitUVRow width is in bytes
#define ANYDETILESPLITUV(NAMEANY, ANY_SIMD, MASK) \
void NAMEANY(const uint8_t* src_uv, ptrdiff_t src_tile_stride, \
uint8_t* dst_u, uint8_t* dst_v, int width) { \
- SIMD_ALIGNED(uint8_t temp[16 * 2]); \
- memset(temp, 0, 16 * 2); /* for msan */ \
+ SIMD_ALIGNED(uint8_t vin[16]); \
+ SIMD_ALIGNED(uint8_t vout[8 * 2]); \
+ memset(vin, 0, sizeof(vin)); /* for msan */ \
int r = width & MASK; \
int n = width & ~MASK; \
if (n > 0) { \
ANY_SIMD(src_uv, src_tile_stride, dst_u, dst_v, n); \
} \
- memcpy(temp, src_uv + (n / 16) * src_tile_stride, r); \
- ANY_SIMD(temp, src_tile_stride, temp + 16, temp + 24, r); \
- memcpy(dst_u + n / 2, temp + 16, (r + 1) / 2); \
- memcpy(dst_v + n / 2, temp + 24, (r + 1) / 2); \
+ memcpy(vin, src_uv + (n / 16) * src_tile_stride, r); \
+ ANY_SIMD(vin, src_tile_stride, vout, vout + 8, r); \
+ memcpy(dst_u + n / 2, vout, (r + 1) / 2); \
+ memcpy(dst_v + n / 2, vout + 8, (r + 1) / 2); \
}
#ifdef HAS_DETILESPLITUVROW_NEON
@@ -2229,6 +2426,33 @@ ANYDETILESPLITUV(DetileSplitUVRow_Any_NEON, DetileSplitUVRow_NEON, 15)
ANYDETILESPLITUV(DetileSplitUVRow_Any_SSSE3, DetileSplitUVRow_SSSE3, 15)
#endif
+#define ANYDETILEMERGE(NAMEANY, ANY_SIMD, MASK) \
+ void NAMEANY(const uint8_t* src_y, ptrdiff_t src_y_tile_stride, \
+ const uint8_t* src_uv, ptrdiff_t src_uv_tile_stride, \
+ uint8_t* dst_yuy2, int width) { \
+ SIMD_ALIGNED(uint8_t vin[16 * 2]); \
+ SIMD_ALIGNED(uint8_t vout[16 * 2]); \
+ memset(vin, 0, sizeof(vin)); /* for msan */ \
+ int r = width & MASK; \
+ int n = width & ~MASK; \
+ if (n > 0) { \
+ ANY_SIMD(src_y, src_y_tile_stride, src_uv, src_uv_tile_stride, dst_yuy2, \
+ n); \
+ } \
+ memcpy(vin, src_y + (n / 16) * src_y_tile_stride, r); \
+ memcpy(vin + 16, src_uv + (n / 16) * src_uv_tile_stride, r); \
+ ANY_SIMD(vin, src_y_tile_stride, vin + 16, src_uv_tile_stride, vout, r); \
+ memcpy(dst_yuy2 + 2 * n, vout, 2 * r); \
+ }
+
+#ifdef HAS_DETILETOYUY2_NEON
+ANYDETILEMERGE(DetileToYUY2_Any_NEON, DetileToYUY2_NEON, 15)
+#endif
+
+#ifdef HAS_DETILETOYUY2_SSE2
+ANYDETILEMERGE(DetileToYUY2_Any_SSE2, DetileToYUY2_SSE2, 15)
+#endif
+
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
diff --git a/files/source/row_common.cc b/source/row_common.cc
index 83442496..3afc4b4d 100644
--- a/files/source/row_common.cc
+++ b/source/row_common.cc
@@ -21,6 +21,12 @@ namespace libyuv {
extern "C" {
#endif
+#ifdef __cplusplus
+#define STATIC_CAST(type, expr) static_cast<type>(expr)
+#else
+#define STATIC_CAST(type, expr) (type)(expr)
+#endif
+
// This macro controls YUV to RGB using unsigned math to extend range of
// YUV to RGB coefficients to 0 to 4 instead of 0 to 2 for more accuracy on B:
// LIBYUV_UNLIMITED_DATA
@@ -42,7 +48,6 @@ extern "C" {
defined(__i386__) || defined(_M_IX86))
#define LIBYUV_ARGBTOUV_PAVGB 1
#define LIBYUV_RGBTOU_TRUNCATE 1
-#define LIBYUV_ATTENUATE_DUP 1
#endif
#if defined(LIBYUV_BIT_EXACT)
#define LIBYUV_UNATTENUATE_DUP 1
@@ -182,12 +187,13 @@ void RGB565ToARGBRow_C(const uint8_t* src_rgb565,
int width) {
int x;
for (x = 0; x < width; ++x) {
- uint8_t b = src_rgb565[0] & 0x1f;
- uint8_t g = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3);
- uint8_t r = src_rgb565[1] >> 3;
- dst_argb[0] = (b << 3) | (b >> 2);
- dst_argb[1] = (g << 2) | (g >> 4);
- dst_argb[2] = (r << 3) | (r >> 2);
+ uint8_t b = STATIC_CAST(uint8_t, src_rgb565[0] & 0x1f);
+ uint8_t g = STATIC_CAST(
+ uint8_t, (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3));
+ uint8_t r = STATIC_CAST(uint8_t, src_rgb565[1] >> 3);
+ dst_argb[0] = STATIC_CAST(uint8_t, (b << 3) | (b >> 2));
+ dst_argb[1] = STATIC_CAST(uint8_t, (g << 2) | (g >> 4));
+ dst_argb[2] = STATIC_CAST(uint8_t, (r << 3) | (r >> 2));
dst_argb[3] = 255u;
dst_argb += 4;
src_rgb565 += 2;
@@ -199,13 +205,14 @@ void ARGB1555ToARGBRow_C(const uint8_t* src_argb1555,
int width) {
int x;
for (x = 0; x < width; ++x) {
- uint8_t b = src_argb1555[0] & 0x1f;
- uint8_t g = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3);
- uint8_t r = (src_argb1555[1] & 0x7c) >> 2;
- uint8_t a = src_argb1555[1] >> 7;
- dst_argb[0] = (b << 3) | (b >> 2);
- dst_argb[1] = (g << 3) | (g >> 2);
- dst_argb[2] = (r << 3) | (r >> 2);
+ uint8_t b = STATIC_CAST(uint8_t, src_argb1555[0] & 0x1f);
+ uint8_t g = STATIC_CAST(
+ uint8_t, (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3));
+ uint8_t r = STATIC_CAST(uint8_t, (src_argb1555[1] & 0x7c) >> 2);
+ uint8_t a = STATIC_CAST(uint8_t, src_argb1555[1] >> 7);
+ dst_argb[0] = STATIC_CAST(uint8_t, (b << 3) | (b >> 2));
+ dst_argb[1] = STATIC_CAST(uint8_t, (g << 3) | (g >> 2));
+ dst_argb[2] = STATIC_CAST(uint8_t, (r << 3) | (r >> 2));
dst_argb[3] = -a;
dst_argb += 4;
src_argb1555 += 2;
@@ -217,14 +224,14 @@ void ARGB4444ToARGBRow_C(const uint8_t* src_argb4444,
int width) {
int x;
for (x = 0; x < width; ++x) {
- uint8_t b = src_argb4444[0] & 0x0f;
- uint8_t g = src_argb4444[0] >> 4;
- uint8_t r = src_argb4444[1] & 0x0f;
- uint8_t a = src_argb4444[1] >> 4;
- dst_argb[0] = (b << 4) | b;
- dst_argb[1] = (g << 4) | g;
- dst_argb[2] = (r << 4) | r;
- dst_argb[3] = (a << 4) | a;
+ uint8_t b = STATIC_CAST(uint8_t, src_argb4444[0] & 0x0f);
+ uint8_t g = STATIC_CAST(uint8_t, src_argb4444[0] >> 4);
+ uint8_t r = STATIC_CAST(uint8_t, src_argb4444[1] & 0x0f);
+ uint8_t a = STATIC_CAST(uint8_t, src_argb4444[1] >> 4);
+ dst_argb[0] = STATIC_CAST(uint8_t, (b << 4) | b);
+ dst_argb[1] = STATIC_CAST(uint8_t, (g << 4) | g);
+ dst_argb[2] = STATIC_CAST(uint8_t, (r << 4) | r);
+ dst_argb[3] = STATIC_CAST(uint8_t, (a << 4) | a);
dst_argb += 4;
src_argb4444 += 2;
}
@@ -274,6 +281,54 @@ void AR30ToAB30Row_C(const uint8_t* src_ar30, uint8_t* dst_ab30, int width) {
}
}
+void ARGBToABGRRow_C(const uint8_t* src_argb, uint8_t* dst_abgr, int width) {
+ int x;
+ for (x = 0; x < width; ++x) {
+ uint8_t b = src_argb[0];
+ uint8_t g = src_argb[1];
+ uint8_t r = src_argb[2];
+ uint8_t a = src_argb[3];
+ dst_abgr[0] = r;
+ dst_abgr[1] = g;
+ dst_abgr[2] = b;
+ dst_abgr[3] = a;
+ dst_abgr += 4;
+ src_argb += 4;
+ }
+}
+
+void ARGBToBGRARow_C(const uint8_t* src_argb, uint8_t* dst_bgra, int width) {
+ int x;
+ for (x = 0; x < width; ++x) {
+ uint8_t b = src_argb[0];
+ uint8_t g = src_argb[1];
+ uint8_t r = src_argb[2];
+ uint8_t a = src_argb[3];
+ dst_bgra[0] = a;
+ dst_bgra[1] = r;
+ dst_bgra[2] = g;
+ dst_bgra[3] = b;
+ dst_bgra += 4;
+ src_argb += 4;
+ }
+}
+
+void ARGBToRGBARow_C(const uint8_t* src_argb, uint8_t* dst_rgba, int width) {
+ int x;
+ for (x = 0; x < width; ++x) {
+ uint8_t b = src_argb[0];
+ uint8_t g = src_argb[1];
+ uint8_t r = src_argb[2];
+ uint8_t a = src_argb[3];
+ dst_rgba[0] = a;
+ dst_rgba[1] = b;
+ dst_rgba[2] = g;
+ dst_rgba[3] = r;
+ dst_rgba += 4;
+ src_argb += 4;
+ }
+}
+
void ARGBToRGB24Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
int x;
for (x = 0; x < width; ++x) {
@@ -302,6 +357,22 @@ void ARGBToRAWRow_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
}
}
+void RGBAToARGBRow_C(const uint8_t* src_rgba, uint8_t* dst_argb, int width) {
+ int x;
+ for (x = 0; x < width; ++x) {
+ uint8_t a = src_rgba[0];
+ uint8_t b = src_rgba[1];
+ uint8_t g = src_rgba[2];
+ uint8_t r = src_rgba[3];
+ dst_argb[0] = b;
+ dst_argb[1] = g;
+ dst_argb[2] = r;
+ dst_argb[3] = a;
+ dst_argb += 4;
+ src_rgba += 4;
+ }
+}
+
void ARGBToRGB565Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
int x;
for (x = 0; x < width - 1; x += 2) {
@@ -320,7 +391,7 @@ void ARGBToRGB565Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
uint8_t b0 = src_argb[0] >> 3;
uint8_t g0 = src_argb[1] >> 2;
uint8_t r0 = src_argb[2] >> 3;
- *(uint16_t*)(dst_rgb) = b0 | (g0 << 5) | (r0 << 11);
+ *(uint16_t*)(dst_rgb) = STATIC_CAST(uint16_t, b0 | (g0 << 5) | (r0 << 11));
}
}
@@ -334,29 +405,31 @@ void ARGBToRGB565Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
// or the upper byte for big endian.
void ARGBToRGB565DitherRow_C(const uint8_t* src_argb,
uint8_t* dst_rgb,
- const uint32_t dither4,
+ uint32_t dither4,
int width) {
int x;
for (x = 0; x < width - 1; x += 2) {
int dither0 = ((const unsigned char*)(&dither4))[x & 3];
int dither1 = ((const unsigned char*)(&dither4))[(x + 1) & 3];
- uint8_t b0 = clamp255(src_argb[0] + dither0) >> 3;
- uint8_t g0 = clamp255(src_argb[1] + dither0) >> 2;
- uint8_t r0 = clamp255(src_argb[2] + dither0) >> 3;
- uint8_t b1 = clamp255(src_argb[4] + dither1) >> 3;
- uint8_t g1 = clamp255(src_argb[5] + dither1) >> 2;
- uint8_t r1 = clamp255(src_argb[6] + dither1) >> 3;
- *(uint16_t*)(dst_rgb + 0) = b0 | (g0 << 5) | (r0 << 11);
- *(uint16_t*)(dst_rgb + 2) = b1 | (g1 << 5) | (r1 << 11);
+ uint8_t b0 = STATIC_CAST(uint8_t, clamp255(src_argb[0] + dither0) >> 3);
+ uint8_t g0 = STATIC_CAST(uint8_t, clamp255(src_argb[1] + dither0) >> 2);
+ uint8_t r0 = STATIC_CAST(uint8_t, clamp255(src_argb[2] + dither0) >> 3);
+ uint8_t b1 = STATIC_CAST(uint8_t, clamp255(src_argb[4] + dither1) >> 3);
+ uint8_t g1 = STATIC_CAST(uint8_t, clamp255(src_argb[5] + dither1) >> 2);
+ uint8_t r1 = STATIC_CAST(uint8_t, clamp255(src_argb[6] + dither1) >> 3);
+ *(uint16_t*)(dst_rgb + 0) =
+ STATIC_CAST(uint16_t, b0 | (g0 << 5) | (r0 << 11));
+ *(uint16_t*)(dst_rgb + 2) =
+ STATIC_CAST(uint16_t, b1 | (g1 << 5) | (r1 << 11));
dst_rgb += 4;
src_argb += 8;
}
if (width & 1) {
int dither0 = ((const unsigned char*)(&dither4))[(width - 1) & 3];
- uint8_t b0 = clamp255(src_argb[0] + dither0) >> 3;
- uint8_t g0 = clamp255(src_argb[1] + dither0) >> 2;
- uint8_t r0 = clamp255(src_argb[2] + dither0) >> 3;
- *(uint16_t*)(dst_rgb) = b0 | (g0 << 5) | (r0 << 11);
+ uint8_t b0 = STATIC_CAST(uint8_t, clamp255(src_argb[0] + dither0) >> 3);
+ uint8_t g0 = STATIC_CAST(uint8_t, clamp255(src_argb[1] + dither0) >> 2);
+ uint8_t r0 = STATIC_CAST(uint8_t, clamp255(src_argb[2] + dither0) >> 3);
+ *(uint16_t*)(dst_rgb) = STATIC_CAST(uint16_t, b0 | (g0 << 5) | (r0 << 11));
}
}
@@ -371,8 +444,10 @@ void ARGBToARGB1555Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
uint8_t g1 = src_argb[5] >> 3;
uint8_t r1 = src_argb[6] >> 3;
uint8_t a1 = src_argb[7] >> 7;
- *(uint16_t*)(dst_rgb + 0) = b0 | (g0 << 5) | (r0 << 10) | (a0 << 15);
- *(uint16_t*)(dst_rgb + 2) = b1 | (g1 << 5) | (r1 << 10) | (a1 << 15);
+ *(uint16_t*)(dst_rgb + 0) =
+ STATIC_CAST(uint16_t, b0 | (g0 << 5) | (r0 << 10) | (a0 << 15));
+ *(uint16_t*)(dst_rgb + 2) =
+ STATIC_CAST(uint16_t, b1 | (g1 << 5) | (r1 << 10) | (a1 << 15));
dst_rgb += 4;
src_argb += 8;
}
@@ -381,7 +456,8 @@ void ARGBToARGB1555Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
uint8_t g0 = src_argb[1] >> 3;
uint8_t r0 = src_argb[2] >> 3;
uint8_t a0 = src_argb[3] >> 7;
- *(uint16_t*)(dst_rgb) = b0 | (g0 << 5) | (r0 << 10) | (a0 << 15);
+ *(uint16_t*)(dst_rgb) =
+ STATIC_CAST(uint16_t, b0 | (g0 << 5) | (r0 << 10) | (a0 << 15));
}
}
@@ -396,8 +472,10 @@ void ARGBToARGB4444Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
uint8_t g1 = src_argb[5] >> 4;
uint8_t r1 = src_argb[6] >> 4;
uint8_t a1 = src_argb[7] >> 4;
- *(uint16_t*)(dst_rgb + 0) = b0 | (g0 << 4) | (r0 << 8) | (a0 << 12);
- *(uint16_t*)(dst_rgb + 2) = b1 | (g1 << 4) | (r1 << 8) | (a1 << 12);
+ *(uint16_t*)(dst_rgb + 0) =
+ STATIC_CAST(uint16_t, b0 | (g0 << 4) | (r0 << 8) | (a0 << 12));
+ *(uint16_t*)(dst_rgb + 2) =
+ STATIC_CAST(uint16_t, b1 | (g1 << 4) | (r1 << 8) | (a1 << 12));
dst_rgb += 4;
src_argb += 8;
}
@@ -406,18 +484,20 @@ void ARGBToARGB4444Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
uint8_t g0 = src_argb[1] >> 4;
uint8_t r0 = src_argb[2] >> 4;
uint8_t a0 = src_argb[3] >> 4;
- *(uint16_t*)(dst_rgb) = b0 | (g0 << 4) | (r0 << 8) | (a0 << 12);
+ *(uint16_t*)(dst_rgb) =
+ STATIC_CAST(uint16_t, b0 | (g0 << 4) | (r0 << 8) | (a0 << 12));
}
}
void ABGRToAR30Row_C(const uint8_t* src_abgr, uint8_t* dst_ar30, int width) {
int x;
for (x = 0; x < width; ++x) {
- uint32_t b0 = (src_abgr[0] >> 6) | ((uint32_t)(src_abgr[0]) << 2);
+ uint32_t r0 = (src_abgr[0] >> 6) | ((uint32_t)(src_abgr[0]) << 2);
uint32_t g0 = (src_abgr[1] >> 6) | ((uint32_t)(src_abgr[1]) << 2);
- uint32_t r0 = (src_abgr[2] >> 6) | ((uint32_t)(src_abgr[2]) << 2);
+ uint32_t b0 = (src_abgr[2] >> 6) | ((uint32_t)(src_abgr[2]) << 2);
uint32_t a0 = (src_abgr[3] >> 6);
- *(uint32_t*)(dst_ar30) = r0 | (g0 << 10) | (b0 << 20) | (a0 << 30);
+ *(uint32_t*)(dst_ar30) =
+ STATIC_CAST(uint32_t, b0 | (g0 << 10) | (r0 << 20) | (a0 << 30));
dst_ar30 += 4;
src_abgr += 4;
}
@@ -430,7 +510,8 @@ void ARGBToAR30Row_C(const uint8_t* src_argb, uint8_t* dst_ar30, int width) {
uint32_t g0 = (src_argb[1] >> 6) | ((uint32_t)(src_argb[1]) << 2);
uint32_t r0 = (src_argb[2] >> 6) | ((uint32_t)(src_argb[2]) << 2);
uint32_t a0 = (src_argb[3] >> 6);
- *(uint32_t*)(dst_ar30) = b0 | (g0 << 10) | (r0 << 20) | (a0 << 30);
+ *(uint32_t*)(dst_ar30) =
+ STATIC_CAST(uint32_t, b0 | (g0 << 10) | (r0 << 20) | (a0 << 30));
dst_ar30 += 4;
src_argb += 4;
}
@@ -439,10 +520,14 @@ void ARGBToAR30Row_C(const uint8_t* src_argb, uint8_t* dst_ar30, int width) {
void ARGBToAR64Row_C(const uint8_t* src_argb, uint16_t* dst_ar64, int width) {
int x;
for (x = 0; x < width; ++x) {
- dst_ar64[0] = src_argb[0] * 0x0101;
- dst_ar64[1] = src_argb[1] * 0x0101;
- dst_ar64[2] = src_argb[2] * 0x0101;
- dst_ar64[3] = src_argb[3] * 0x0101;
+ uint16_t b = src_argb[0] * 0x0101;
+ uint16_t g = src_argb[1] * 0x0101;
+ uint16_t r = src_argb[2] * 0x0101;
+ uint16_t a = src_argb[3] * 0x0101;
+ dst_ar64[0] = b;
+ dst_ar64[1] = g;
+ dst_ar64[2] = r;
+ dst_ar64[3] = a;
dst_ar64 += 4;
src_argb += 4;
}
@@ -451,10 +536,14 @@ void ARGBToAR64Row_C(const uint8_t* src_argb, uint16_t* dst_ar64, int width) {
void ARGBToAB64Row_C(const uint8_t* src_argb, uint16_t* dst_ab64, int width) {
int x;
for (x = 0; x < width; ++x) {
- dst_ab64[0] = src_argb[2] * 0x0101;
- dst_ab64[1] = src_argb[1] * 0x0101;
- dst_ab64[2] = src_argb[0] * 0x0101;
- dst_ab64[3] = src_argb[3] * 0x0101;
+ uint16_t b = src_argb[0] * 0x0101;
+ uint16_t g = src_argb[1] * 0x0101;
+ uint16_t r = src_argb[2] * 0x0101;
+ uint16_t a = src_argb[3] * 0x0101;
+ dst_ab64[0] = r;
+ dst_ab64[1] = g;
+ dst_ab64[2] = b;
+ dst_ab64[3] = a;
dst_ab64 += 4;
src_argb += 4;
}
@@ -463,10 +552,14 @@ void ARGBToAB64Row_C(const uint8_t* src_argb, uint16_t* dst_ab64, int width) {
void AR64ToARGBRow_C(const uint16_t* src_ar64, uint8_t* dst_argb, int width) {
int x;
for (x = 0; x < width; ++x) {
- dst_argb[0] = src_ar64[0] >> 8;
- dst_argb[1] = src_ar64[1] >> 8;
- dst_argb[2] = src_ar64[2] >> 8;
- dst_argb[3] = src_ar64[3] >> 8;
+ uint8_t b = src_ar64[0] >> 8;
+ uint8_t g = src_ar64[1] >> 8;
+ uint8_t r = src_ar64[2] >> 8;
+ uint8_t a = src_ar64[3] >> 8;
+ dst_argb[0] = b;
+ dst_argb[1] = g;
+ dst_argb[2] = r;
+ dst_argb[3] = a;
dst_argb += 4;
src_ar64 += 4;
}
@@ -475,15 +568,35 @@ void AR64ToARGBRow_C(const uint16_t* src_ar64, uint8_t* dst_argb, int width) {
void AB64ToARGBRow_C(const uint16_t* src_ab64, uint8_t* dst_argb, int width) {
int x;
for (x = 0; x < width; ++x) {
- dst_argb[0] = src_ab64[2] >> 8;
- dst_argb[1] = src_ab64[1] >> 8;
- dst_argb[2] = src_ab64[0] >> 8;
- dst_argb[3] = src_ab64[3] >> 8;
+ uint8_t r = src_ab64[0] >> 8;
+ uint8_t g = src_ab64[1] >> 8;
+ uint8_t b = src_ab64[2] >> 8;
+ uint8_t a = src_ab64[3] >> 8;
+ dst_argb[0] = b;
+ dst_argb[1] = g;
+ dst_argb[2] = r;
+ dst_argb[3] = a;
dst_argb += 4;
src_ab64 += 4;
}
}
+void AR64ToAB64Row_C(const uint16_t* src_ar64, uint16_t* dst_ab64, int width) {
+ int x;
+ for (x = 0; x < width; ++x) {
+ uint16_t b = src_ar64[0];
+ uint16_t g = src_ar64[1];
+ uint16_t r = src_ar64[2];
+ uint16_t a = src_ar64[3];
+ dst_ab64[0] = r;
+ dst_ab64[1] = g;
+ dst_ab64[2] = b;
+ dst_ab64[3] = a;
+ dst_ab64 += 4;
+ src_ar64 += 4;
+ }
+}
+
// TODO(fbarchard): Make shuffle compatible with SIMD versions
void AR64ShuffleRow_C(const uint8_t* src_ar64,
uint8_t* dst_ar64,
@@ -514,8 +627,8 @@ void AR64ShuffleRow_C(const uint8_t* src_ar64,
#ifdef LIBYUV_RGB7
// Old 7 bit math for compatibility on unsupported platforms.
-static __inline int RGBToY(uint8_t r, uint8_t g, uint8_t b) {
- return ((33 * r + 65 * g + 13 * b) >> 7) + 16;
+static __inline uint8_t RGBToY(uint8_t r, uint8_t g, uint8_t b) {
+ return STATIC_CAST(uint8_t, ((33 * r + 65 * g + 13 * b) >> 7) + 16);
}
#else
// 8 bit
@@ -524,8 +637,8 @@ static __inline int RGBToY(uint8_t r, uint8_t g, uint8_t b) {
// return (66 * ((int)r - 128) + 129 * ((int)g - 128) + 25 * ((int)b - 128) +
// 0x7e80) >> 8;
-static __inline int RGBToY(uint8_t r, uint8_t g, uint8_t b) {
- return (66 * r + 129 * g + 25 * b + 0x1080) >> 8;
+static __inline uint8_t RGBToY(uint8_t r, uint8_t g, uint8_t b) {
+ return STATIC_CAST(uint8_t, (66 * r + 129 * g + 25 * b + 0x1080) >> 8);
}
#endif
@@ -533,29 +646,31 @@ static __inline int RGBToY(uint8_t r, uint8_t g, uint8_t b) {
// LIBYUV_RGBTOU_TRUNCATE mimics x86 code that does not round.
#ifdef LIBYUV_RGBTOU_TRUNCATE
-static __inline int RGBToU(uint8_t r, uint8_t g, uint8_t b) {
- return (112 * b - 74 * g - 38 * r + 0x8000) >> 8;
+static __inline uint8_t RGBToU(uint8_t r, uint8_t g, uint8_t b) {
+ return STATIC_CAST(uint8_t, (112 * b - 74 * g - 38 * r + 0x8000) >> 8);
}
-static __inline int RGBToV(uint8_t r, uint8_t g, uint8_t b) {
- return (112 * r - 94 * g - 18 * b + 0x8000) >> 8;
+static __inline uint8_t RGBToV(uint8_t r, uint8_t g, uint8_t b) {
+ return STATIC_CAST(uint8_t, (112 * r - 94 * g - 18 * b + 0x8000) >> 8);
}
#else
// TODO(fbarchard): Add rounding to x86 SIMD and use this
-static __inline int RGBToU(uint8_t r, uint8_t g, uint8_t b) {
- return (112 * b - 74 * g - 38 * r + 0x8080) >> 8;
+static __inline uint8_t RGBToU(uint8_t r, uint8_t g, uint8_t b) {
+ return STATIC_CAST(uint8_t, (112 * b - 74 * g - 38 * r + 0x8080) >> 8);
}
-static __inline int RGBToV(uint8_t r, uint8_t g, uint8_t b) {
- return (112 * r - 94 * g - 18 * b + 0x8080) >> 8;
+static __inline uint8_t RGBToV(uint8_t r, uint8_t g, uint8_t b) {
+ return STATIC_CAST(uint8_t, (112 * r - 94 * g - 18 * b + 0x8080) >> 8);
}
#endif
// LIBYUV_ARGBTOUV_PAVGB mimics x86 code that subsamples with 2 pavgb.
#if !defined(LIBYUV_ARGBTOUV_PAVGB)
static __inline int RGB2xToU(uint16_t r, uint16_t g, uint16_t b) {
- return ((112 / 2) * b - (74 / 2) * g - (38 / 2) * r + 0x8080) >> 8;
+ return STATIC_CAST(
+ uint8_t, ((112 / 2) * b - (74 / 2) * g - (38 / 2) * r + 0x8080) >> 8);
}
static __inline int RGB2xToV(uint16_t r, uint16_t g, uint16_t b) {
- return ((112 / 2) * r - (94 / 2) * g - (18 / 2) * b + 0x8080) >> 8;
+ return STATIC_CAST(
+ uint8_t, ((112 / 2) * r - (94 / 2) * g - (18 / 2) * b + 0x8080) >> 8);
}
#endif
@@ -674,28 +789,28 @@ MAKEROWY(RAW, 0, 1, 2, 3)
#ifdef LIBYUV_RGB7
// Old 7 bit math for compatibility on unsupported platforms.
-static __inline int RGBToYJ(uint8_t r, uint8_t g, uint8_t b) {
+static __inline uint8_t RGBToYJ(uint8_t r, uint8_t g, uint8_t b) {
return (38 * r + 75 * g + 15 * b + 64) >> 7;
}
#else
// 8 bit
-static __inline int RGBToYJ(uint8_t r, uint8_t g, uint8_t b) {
+static __inline uint8_t RGBToYJ(uint8_t r, uint8_t g, uint8_t b) {
return (77 * r + 150 * g + 29 * b + 128) >> 8;
}
#endif
#if defined(LIBYUV_ARGBTOUV_PAVGB)
-static __inline int RGBToUJ(uint8_t r, uint8_t g, uint8_t b) {
+static __inline uint8_t RGBToUJ(uint8_t r, uint8_t g, uint8_t b) {
return (127 * b - 84 * g - 43 * r + 0x8080) >> 8;
}
-static __inline int RGBToVJ(uint8_t r, uint8_t g, uint8_t b) {
+static __inline uint8_t RGBToVJ(uint8_t r, uint8_t g, uint8_t b) {
return (127 * r - 107 * g - 20 * b + 0x8080) >> 8;
}
#else
-static __inline int RGB2xToUJ(uint16_t r, uint16_t g, uint16_t b) {
+static __inline uint8_t RGB2xToUJ(uint16_t r, uint16_t g, uint16_t b) {
return ((127 / 2) * b - (84 / 2) * g - (43 / 2) * r + 0x8080) >> 8;
}
-static __inline int RGB2xToVJ(uint16_t r, uint16_t g, uint16_t b) {
+static __inline uint8_t RGB2xToVJ(uint16_t r, uint16_t g, uint16_t b) {
return ((127 / 2) * r - (107 / 2) * g - (20 / 2) * b + 0x8080) >> 8;
}
#endif
@@ -782,6 +897,7 @@ static __inline int RGB2xToVJ(uint16_t r, uint16_t g, uint16_t b) {
#endif
MAKEROWYJ(ARGB, 2, 1, 0, 4)
+MAKEROWYJ(ABGR, 0, 1, 2, 4)
MAKEROWYJ(RGBA, 3, 2, 1, 4)
MAKEROWYJ(RGB24, 2, 1, 0, 3)
MAKEROWYJ(RAW, 0, 1, 2, 3)
@@ -791,11 +907,12 @@ void RGB565ToYRow_C(const uint8_t* src_rgb565, uint8_t* dst_y, int width) {
int x;
for (x = 0; x < width; ++x) {
uint8_t b = src_rgb565[0] & 0x1f;
- uint8_t g = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3);
+ uint8_t g = STATIC_CAST(
+ uint8_t, (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3));
uint8_t r = src_rgb565[1] >> 3;
- b = (b << 3) | (b >> 2);
- g = (g << 2) | (g >> 4);
- r = (r << 3) | (r >> 2);
+ b = STATIC_CAST(uint8_t, (b << 3) | (b >> 2));
+ g = STATIC_CAST(uint8_t, (g << 2) | (g >> 4));
+ r = STATIC_CAST(uint8_t, (r << 3) | (r >> 2));
dst_y[0] = RGBToY(r, g, b);
src_rgb565 += 2;
dst_y += 1;
@@ -806,11 +923,12 @@ void ARGB1555ToYRow_C(const uint8_t* src_argb1555, uint8_t* dst_y, int width) {
int x;
for (x = 0; x < width; ++x) {
uint8_t b = src_argb1555[0] & 0x1f;
- uint8_t g = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3);
+ uint8_t g = STATIC_CAST(
+ uint8_t, (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3));
uint8_t r = (src_argb1555[1] & 0x7c) >> 2;
- b = (b << 3) | (b >> 2);
- g = (g << 3) | (g >> 2);
- r = (r << 3) | (r >> 2);
+ b = STATIC_CAST(uint8_t, (b << 3) | (b >> 2));
+ g = STATIC_CAST(uint8_t, (g << 3) | (g >> 2));
+ r = STATIC_CAST(uint8_t, (r << 3) | (r >> 2));
dst_y[0] = RGBToY(r, g, b);
src_argb1555 += 2;
dst_y += 1;
@@ -823,9 +941,9 @@ void ARGB4444ToYRow_C(const uint8_t* src_argb4444, uint8_t* dst_y, int width) {
uint8_t b = src_argb4444[0] & 0x0f;
uint8_t g = src_argb4444[0] >> 4;
uint8_t r = src_argb4444[1] & 0x0f;
- b = (b << 4) | b;
- g = (g << 4) | g;
- r = (r << 4) | r;
+ b = STATIC_CAST(uint8_t, (b << 4) | b);
+ g = STATIC_CAST(uint8_t, (g << 4) | g);
+ r = STATIC_CAST(uint8_t, (r << 4) | r);
dst_y[0] = RGBToY(r, g, b);
src_argb4444 += 2;
dst_y += 1;
@@ -840,31 +958,35 @@ void RGB565ToUVRow_C(const uint8_t* src_rgb565,
const uint8_t* next_rgb565 = src_rgb565 + src_stride_rgb565;
int x;
for (x = 0; x < width - 1; x += 2) {
- uint8_t b0 = src_rgb565[0] & 0x1f;
- uint8_t g0 = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3);
- uint8_t r0 = src_rgb565[1] >> 3;
- uint8_t b1 = src_rgb565[2] & 0x1f;
- uint8_t g1 = (src_rgb565[2] >> 5) | ((src_rgb565[3] & 0x07) << 3);
- uint8_t r1 = src_rgb565[3] >> 3;
- uint8_t b2 = next_rgb565[0] & 0x1f;
- uint8_t g2 = (next_rgb565[0] >> 5) | ((next_rgb565[1] & 0x07) << 3);
- uint8_t r2 = next_rgb565[1] >> 3;
- uint8_t b3 = next_rgb565[2] & 0x1f;
- uint8_t g3 = (next_rgb565[2] >> 5) | ((next_rgb565[3] & 0x07) << 3);
- uint8_t r3 = next_rgb565[3] >> 3;
-
- b0 = (b0 << 3) | (b0 >> 2);
- g0 = (g0 << 2) | (g0 >> 4);
- r0 = (r0 << 3) | (r0 >> 2);
- b1 = (b1 << 3) | (b1 >> 2);
- g1 = (g1 << 2) | (g1 >> 4);
- r1 = (r1 << 3) | (r1 >> 2);
- b2 = (b2 << 3) | (b2 >> 2);
- g2 = (g2 << 2) | (g2 >> 4);
- r2 = (r2 << 3) | (r2 >> 2);
- b3 = (b3 << 3) | (b3 >> 2);
- g3 = (g3 << 2) | (g3 >> 4);
- r3 = (r3 << 3) | (r3 >> 2);
+ uint8_t b0 = STATIC_CAST(uint8_t, src_rgb565[0] & 0x1f);
+ uint8_t g0 = STATIC_CAST(
+ uint8_t, (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3));
+ uint8_t r0 = STATIC_CAST(uint8_t, src_rgb565[1] >> 3);
+ uint8_t b1 = STATIC_CAST(uint8_t, src_rgb565[2] & 0x1f);
+ uint8_t g1 = STATIC_CAST(
+ uint8_t, (src_rgb565[2] >> 5) | ((src_rgb565[3] & 0x07) << 3));
+ uint8_t r1 = STATIC_CAST(uint8_t, src_rgb565[3] >> 3);
+ uint8_t b2 = STATIC_CAST(uint8_t, next_rgb565[0] & 0x1f);
+ uint8_t g2 = STATIC_CAST(
+ uint8_t, (next_rgb565[0] >> 5) | ((next_rgb565[1] & 0x07) << 3));
+ uint8_t r2 = STATIC_CAST(uint8_t, next_rgb565[1] >> 3);
+ uint8_t b3 = STATIC_CAST(uint8_t, next_rgb565[2] & 0x1f);
+ uint8_t g3 = STATIC_CAST(
+ uint8_t, (next_rgb565[2] >> 5) | ((next_rgb565[3] & 0x07) << 3));
+ uint8_t r3 = STATIC_CAST(uint8_t, next_rgb565[3] >> 3);
+
+ b0 = STATIC_CAST(uint8_t, (b0 << 3) | (b0 >> 2));
+ g0 = STATIC_CAST(uint8_t, (g0 << 2) | (g0 >> 4));
+ r0 = STATIC_CAST(uint8_t, (r0 << 3) | (r0 >> 2));
+ b1 = STATIC_CAST(uint8_t, (b1 << 3) | (b1 >> 2));
+ g1 = STATIC_CAST(uint8_t, (g1 << 2) | (g1 >> 4));
+ r1 = STATIC_CAST(uint8_t, (r1 << 3) | (r1 >> 2));
+ b2 = STATIC_CAST(uint8_t, (b2 << 3) | (b2 >> 2));
+ g2 = STATIC_CAST(uint8_t, (g2 << 2) | (g2 >> 4));
+ r2 = STATIC_CAST(uint8_t, (r2 << 3) | (r2 >> 2));
+ b3 = STATIC_CAST(uint8_t, (b3 << 3) | (b3 >> 2));
+ g3 = STATIC_CAST(uint8_t, (g3 << 2) | (g3 >> 4));
+ r3 = STATIC_CAST(uint8_t, (r3 << 3) | (r3 >> 2));
#if LIBYUV_ARGBTOUV_PAVGB
uint8_t ab = AVGB(AVGB(b0, b2), AVGB(b1, b3));
@@ -886,19 +1008,20 @@ void RGB565ToUVRow_C(const uint8_t* src_rgb565,
dst_v += 1;
}
if (width & 1) {
- uint8_t b0 = src_rgb565[0] & 0x1f;
- uint8_t g0 = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3);
- uint8_t r0 = src_rgb565[1] >> 3;
- uint8_t b2 = next_rgb565[0] & 0x1f;
- uint8_t g2 = (next_rgb565[0] >> 5) | ((next_rgb565[1] & 0x07) << 3);
- uint8_t r2 = next_rgb565[1] >> 3;
-
- b0 = (b0 << 3) | (b0 >> 2);
- g0 = (g0 << 2) | (g0 >> 4);
- r0 = (r0 << 3) | (r0 >> 2);
- b2 = (b2 << 3) | (b2 >> 2);
- g2 = (g2 << 2) | (g2 >> 4);
- r2 = (r2 << 3) | (r2 >> 2);
+ uint8_t b0 = STATIC_CAST(uint8_t, src_rgb565[0] & 0x1f);
+ uint8_t g0 = STATIC_CAST(
+ uint8_t, (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3));
+ uint8_t r0 = STATIC_CAST(uint8_t, src_rgb565[1] >> 3);
+ uint8_t b2 = STATIC_CAST(uint8_t, next_rgb565[0] & 0x1f);
+ uint8_t g2 = STATIC_CAST(
+ uint8_t, (next_rgb565[0] >> 5) | ((next_rgb565[1] & 0x07) << 3));
+ uint8_t r2 = STATIC_CAST(uint8_t, next_rgb565[1] >> 3);
+ b0 = STATIC_CAST(uint8_t, (b0 << 3) | (b0 >> 2));
+ g0 = STATIC_CAST(uint8_t, (g0 << 2) | (g0 >> 4));
+ r0 = STATIC_CAST(uint8_t, (r0 << 3) | (r0 >> 2));
+ b2 = STATIC_CAST(uint8_t, (b2 << 3) | (b2 >> 2));
+ g2 = STATIC_CAST(uint8_t, (g2 << 2) | (g2 >> 4));
+ r2 = STATIC_CAST(uint8_t, (r2 << 3) | (r2 >> 2));
#if LIBYUV_ARGBTOUV_PAVGB
uint8_t ab = AVGB(b0, b2);
@@ -924,31 +1047,35 @@ void ARGB1555ToUVRow_C(const uint8_t* src_argb1555,
const uint8_t* next_argb1555 = src_argb1555 + src_stride_argb1555;
int x;
for (x = 0; x < width - 1; x += 2) {
- uint8_t b0 = src_argb1555[0] & 0x1f;
- uint8_t g0 = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3);
- uint8_t r0 = (src_argb1555[1] & 0x7c) >> 2;
- uint8_t b1 = src_argb1555[2] & 0x1f;
- uint8_t g1 = (src_argb1555[2] >> 5) | ((src_argb1555[3] & 0x03) << 3);
- uint8_t r1 = (src_argb1555[3] & 0x7c) >> 2;
- uint8_t b2 = next_argb1555[0] & 0x1f;
- uint8_t g2 = (next_argb1555[0] >> 5) | ((next_argb1555[1] & 0x03) << 3);
- uint8_t r2 = (next_argb1555[1] & 0x7c) >> 2;
- uint8_t b3 = next_argb1555[2] & 0x1f;
- uint8_t g3 = (next_argb1555[2] >> 5) | ((next_argb1555[3] & 0x03) << 3);
- uint8_t r3 = (next_argb1555[3] & 0x7c) >> 2;
-
- b0 = (b0 << 3) | (b0 >> 2);
- g0 = (g0 << 3) | (g0 >> 2);
- r0 = (r0 << 3) | (r0 >> 2);
- b1 = (b1 << 3) | (b1 >> 2);
- g1 = (g1 << 3) | (g1 >> 2);
- r1 = (r1 << 3) | (r1 >> 2);
- b2 = (b2 << 3) | (b2 >> 2);
- g2 = (g2 << 3) | (g2 >> 2);
- r2 = (r2 << 3) | (r2 >> 2);
- b3 = (b3 << 3) | (b3 >> 2);
- g3 = (g3 << 3) | (g3 >> 2);
- r3 = (r3 << 3) | (r3 >> 2);
+ uint8_t b0 = STATIC_CAST(uint8_t, src_argb1555[0] & 0x1f);
+ uint8_t g0 = STATIC_CAST(
+ uint8_t, (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3));
+ uint8_t r0 = STATIC_CAST(uint8_t, (src_argb1555[1] & 0x7c) >> 2);
+ uint8_t b1 = STATIC_CAST(uint8_t, src_argb1555[2] & 0x1f);
+ uint8_t g1 = STATIC_CAST(
+ uint8_t, (src_argb1555[2] >> 5) | ((src_argb1555[3] & 0x03) << 3));
+ uint8_t r1 = STATIC_CAST(uint8_t, (src_argb1555[3] & 0x7c) >> 2);
+ uint8_t b2 = STATIC_CAST(uint8_t, next_argb1555[0] & 0x1f);
+ uint8_t g2 = STATIC_CAST(
+ uint8_t, (next_argb1555[0] >> 5) | ((next_argb1555[1] & 0x03) << 3));
+ uint8_t r2 = STATIC_CAST(uint8_t, (next_argb1555[1] & 0x7c) >> 2);
+ uint8_t b3 = STATIC_CAST(uint8_t, next_argb1555[2] & 0x1f);
+ uint8_t g3 = STATIC_CAST(
+ uint8_t, (next_argb1555[2] >> 5) | ((next_argb1555[3] & 0x03) << 3));
+ uint8_t r3 = STATIC_CAST(uint8_t, (next_argb1555[3] & 0x7c) >> 2);
+
+ b0 = STATIC_CAST(uint8_t, (b0 << 3) | (b0 >> 2));
+ g0 = STATIC_CAST(uint8_t, (g0 << 3) | (g0 >> 2));
+ r0 = STATIC_CAST(uint8_t, (r0 << 3) | (r0 >> 2));
+ b1 = STATIC_CAST(uint8_t, (b1 << 3) | (b1 >> 2));
+ g1 = STATIC_CAST(uint8_t, (g1 << 3) | (g1 >> 2));
+ r1 = STATIC_CAST(uint8_t, (r1 << 3) | (r1 >> 2));
+ b2 = STATIC_CAST(uint8_t, (b2 << 3) | (b2 >> 2));
+ g2 = STATIC_CAST(uint8_t, (g2 << 3) | (g2 >> 2));
+ r2 = STATIC_CAST(uint8_t, (r2 << 3) | (r2 >> 2));
+ b3 = STATIC_CAST(uint8_t, (b3 << 3) | (b3 >> 2));
+ g3 = STATIC_CAST(uint8_t, (g3 << 3) | (g3 >> 2));
+ r3 = STATIC_CAST(uint8_t, (r3 << 3) | (r3 >> 2));
#if LIBYUV_ARGBTOUV_PAVGB
uint8_t ab = AVGB(AVGB(b0, b2), AVGB(b1, b3));
@@ -970,19 +1097,21 @@ void ARGB1555ToUVRow_C(const uint8_t* src_argb1555,
dst_v += 1;
}
if (width & 1) {
- uint8_t b0 = src_argb1555[0] & 0x1f;
- uint8_t g0 = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3);
- uint8_t r0 = (src_argb1555[1] & 0x7c) >> 2;
- uint8_t b2 = next_argb1555[0] & 0x1f;
- uint8_t g2 = (next_argb1555[0] >> 5) | ((next_argb1555[1] & 0x03) << 3);
- uint8_t r2 = (next_argb1555[1] & 0x7c) >> 2;
-
- b0 = (b0 << 3) | (b0 >> 2);
- g0 = (g0 << 3) | (g0 >> 2);
- r0 = (r0 << 3) | (r0 >> 2);
- b2 = (b2 << 3) | (b2 >> 2);
- g2 = (g2 << 3) | (g2 >> 2);
- r2 = (r2 << 3) | (r2 >> 2);
+ uint8_t b0 = STATIC_CAST(uint8_t, src_argb1555[0] & 0x1f);
+ uint8_t g0 = STATIC_CAST(
+ uint8_t, (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3));
+ uint8_t r0 = STATIC_CAST(uint8_t, (src_argb1555[1] & 0x7c) >> 2);
+ uint8_t b2 = STATIC_CAST(uint8_t, next_argb1555[0] & 0x1f);
+ uint8_t g2 = STATIC_CAST(
+ uint8_t, (next_argb1555[0] >> 5) | ((next_argb1555[1] & 0x03) << 3));
+ uint8_t r2 = STATIC_CAST(uint8_t, (next_argb1555[1] & 0x7c) >> 2);
+
+ b0 = STATIC_CAST(uint8_t, (b0 << 3) | (b0 >> 2));
+ g0 = STATIC_CAST(uint8_t, (g0 << 3) | (g0 >> 2));
+ r0 = STATIC_CAST(uint8_t, (r0 << 3) | (r0 >> 2));
+ b2 = STATIC_CAST(uint8_t, (b2 << 3) | (b2 >> 2));
+ g2 = STATIC_CAST(uint8_t, (g2 << 3) | (g2 >> 2));
+ r2 = STATIC_CAST(uint8_t, (r2 << 3) | (r2 >> 2));
#if LIBYUV_ARGBTOUV_PAVGB
uint8_t ab = AVGB(b0, b2);
@@ -1021,18 +1150,18 @@ void ARGB4444ToUVRow_C(const uint8_t* src_argb4444,
uint8_t g3 = next_argb4444[2] >> 4;
uint8_t r3 = next_argb4444[3] & 0x0f;
- b0 = (b0 << 4) | b0;
- g0 = (g0 << 4) | g0;
- r0 = (r0 << 4) | r0;
- b1 = (b1 << 4) | b1;
- g1 = (g1 << 4) | g1;
- r1 = (r1 << 4) | r1;
- b2 = (b2 << 4) | b2;
- g2 = (g2 << 4) | g2;
- r2 = (r2 << 4) | r2;
- b3 = (b3 << 4) | b3;
- g3 = (g3 << 4) | g3;
- r3 = (r3 << 4) | r3;
+ b0 = STATIC_CAST(uint8_t, (b0 << 4) | b0);
+ g0 = STATIC_CAST(uint8_t, (g0 << 4) | g0);
+ r0 = STATIC_CAST(uint8_t, (r0 << 4) | r0);
+ b1 = STATIC_CAST(uint8_t, (b1 << 4) | b1);
+ g1 = STATIC_CAST(uint8_t, (g1 << 4) | g1);
+ r1 = STATIC_CAST(uint8_t, (r1 << 4) | r1);
+ b2 = STATIC_CAST(uint8_t, (b2 << 4) | b2);
+ g2 = STATIC_CAST(uint8_t, (g2 << 4) | g2);
+ r2 = STATIC_CAST(uint8_t, (r2 << 4) | r2);
+ b3 = STATIC_CAST(uint8_t, (b3 << 4) | b3);
+ g3 = STATIC_CAST(uint8_t, (g3 << 4) | g3);
+ r3 = STATIC_CAST(uint8_t, (r3 << 4) | r3);
#if LIBYUV_ARGBTOUV_PAVGB
uint8_t ab = AVGB(AVGB(b0, b2), AVGB(b1, b3));
@@ -1061,12 +1190,12 @@ void ARGB4444ToUVRow_C(const uint8_t* src_argb4444,
uint8_t g2 = next_argb4444[0] >> 4;
uint8_t r2 = next_argb4444[1] & 0x0f;
- b0 = (b0 << 4) | b0;
- g0 = (g0 << 4) | g0;
- r0 = (r0 << 4) | r0;
- b2 = (b2 << 4) | b2;
- g2 = (g2 << 4) | g2;
- r2 = (r2 << 4) | r2;
+ b0 = STATIC_CAST(uint8_t, (b0 << 4) | b0);
+ g0 = STATIC_CAST(uint8_t, (g0 << 4) | g0);
+ r0 = STATIC_CAST(uint8_t, (r0 << 4) | r0);
+ b2 = STATIC_CAST(uint8_t, (b2 << 4) | b2);
+ g2 = STATIC_CAST(uint8_t, (g2 << 4) | g2);
+ r2 = STATIC_CAST(uint8_t, (r2 << 4) | r2);
#if LIBYUV_ARGBTOUV_PAVGB
uint8_t ab = AVGB(b0, b2);
@@ -1123,9 +1252,9 @@ void ARGBSepiaRow_C(uint8_t* dst_argb, int width) {
int sg = (b * 22 + g * 88 + r * 45) >> 7;
int sr = (b * 24 + g * 98 + r * 50) >> 7;
// b does not over flow. a is preserved from original.
- dst_argb[0] = sb;
- dst_argb[1] = clamp255(sg);
- dst_argb[2] = clamp255(sr);
+ dst_argb[0] = STATIC_CAST(uint8_t, sb);
+ dst_argb[1] = STATIC_CAST(uint8_t, clamp255(sg));
+ dst_argb[2] = STATIC_CAST(uint8_t, clamp255(sr));
dst_argb += 4;
}
}
@@ -1154,10 +1283,10 @@ void ARGBColorMatrixRow_C(const uint8_t* src_argb,
int sa = (b * matrix_argb[12] + g * matrix_argb[13] + r * matrix_argb[14] +
a * matrix_argb[15]) >>
6;
- dst_argb[0] = Clamp(sb);
- dst_argb[1] = Clamp(sg);
- dst_argb[2] = Clamp(sr);
- dst_argb[3] = Clamp(sa);
+ dst_argb[0] = STATIC_CAST(uint8_t, Clamp(sb));
+ dst_argb[1] = STATIC_CAST(uint8_t, Clamp(sg));
+ dst_argb[2] = STATIC_CAST(uint8_t, Clamp(sr));
+ dst_argb[3] = STATIC_CAST(uint8_t, Clamp(sa));
src_argb += 4;
dst_argb += 4;
}
@@ -1207,9 +1336,12 @@ void ARGBQuantizeRow_C(uint8_t* dst_argb,
int b = dst_argb[0];
int g = dst_argb[1];
int r = dst_argb[2];
- dst_argb[0] = (b * scale >> 16) * interval_size + interval_offset;
- dst_argb[1] = (g * scale >> 16) * interval_size + interval_offset;
- dst_argb[2] = (r * scale >> 16) * interval_size + interval_offset;
+ dst_argb[0] = STATIC_CAST(
+ uint8_t, (b * scale >> 16) * interval_size + interval_offset);
+ dst_argb[1] = STATIC_CAST(
+ uint8_t, (g * scale >> 16) * interval_size + interval_offset);
+ dst_argb[2] = STATIC_CAST(
+ uint8_t, (r * scale >> 16) * interval_size + interval_offset);
dst_argb += 4;
}
}
@@ -1260,10 +1392,10 @@ void ARGBMultiplyRow_C(const uint8_t* src_argb,
const uint32_t g_scale = src_argb1[1];
const uint32_t r_scale = src_argb1[2];
const uint32_t a_scale = src_argb1[3];
- dst_argb[0] = SHADE(b, b_scale);
- dst_argb[1] = SHADE(g, g_scale);
- dst_argb[2] = SHADE(r, r_scale);
- dst_argb[3] = SHADE(a, a_scale);
+ dst_argb[0] = STATIC_CAST(uint8_t, SHADE(b, b_scale));
+ dst_argb[1] = STATIC_CAST(uint8_t, SHADE(g, g_scale));
+ dst_argb[2] = STATIC_CAST(uint8_t, SHADE(r, r_scale));
+ dst_argb[3] = STATIC_CAST(uint8_t, SHADE(a, a_scale));
src_argb += 4;
src_argb1 += 4;
dst_argb += 4;
@@ -1288,10 +1420,10 @@ void ARGBAddRow_C(const uint8_t* src_argb,
const int g_add = src_argb1[1];
const int r_add = src_argb1[2];
const int a_add = src_argb1[3];
- dst_argb[0] = SHADE(b, b_add);
- dst_argb[1] = SHADE(g, g_add);
- dst_argb[2] = SHADE(r, r_add);
- dst_argb[3] = SHADE(a, a_add);
+ dst_argb[0] = STATIC_CAST(uint8_t, SHADE(b, b_add));
+ dst_argb[1] = STATIC_CAST(uint8_t, SHADE(g, g_add));
+ dst_argb[2] = STATIC_CAST(uint8_t, SHADE(r, r_add));
+ dst_argb[3] = STATIC_CAST(uint8_t, SHADE(a, a_add));
src_argb += 4;
src_argb1 += 4;
dst_argb += 4;
@@ -1315,10 +1447,10 @@ void ARGBSubtractRow_C(const uint8_t* src_argb,
const int g_sub = src_argb1[1];
const int r_sub = src_argb1[2];
const int a_sub = src_argb1[3];
- dst_argb[0] = SHADE(b, b_sub);
- dst_argb[1] = SHADE(g, g_sub);
- dst_argb[2] = SHADE(r, r_sub);
- dst_argb[3] = SHADE(a, a_sub);
+ dst_argb[0] = STATIC_CAST(uint8_t, SHADE(b, b_sub));
+ dst_argb[1] = STATIC_CAST(uint8_t, SHADE(g, g_sub));
+ dst_argb[2] = STATIC_CAST(uint8_t, SHADE(r, r_sub));
+ dst_argb[3] = STATIC_CAST(uint8_t, SHADE(a, a_sub));
src_argb += 4;
src_argb1 += 4;
dst_argb += 4;
@@ -1431,7 +1563,7 @@ void J400ToARGBRow_C(const uint8_t* src_y, uint8_t* dst_argb, int width) {
// clang-format off
-#if defined(__aarch64__) || defined(__arm__)
+#if defined(__aarch64__) || defined(__arm__) || defined(__riscv)
// Bias values include subtract 128 from U and V, bias from Y and rounding.
// For B and R bias is negative. For G bias is positive.
#define YUVCONSTANTSBODY(YG, YB, UB, UG, VG, VR) \
@@ -1627,7 +1759,7 @@ MAKEYUVCONSTANTS(V2020, YG, YB, UB, UG, VG, VR)
#undef MAKEYUVCONSTANTS
-#if defined(__aarch64__) || defined(__arm__)
+#if defined(__aarch64__) || defined(__arm__) || defined(__riscv)
#define LOAD_YUV_CONSTANTS \
int ub = yuvconstants->kUVCoeff[0]; \
int vr = yuvconstants->kUVCoeff[1]; \
@@ -1675,9 +1807,9 @@ static __inline void YuvPixel(uint8_t y,
LOAD_YUV_CONSTANTS;
uint32_t y32 = y * 0x0101;
CALC_RGB16;
- *b = Clamp((int32_t)(b16) >> 6);
- *g = Clamp((int32_t)(g16) >> 6);
- *r = Clamp((int32_t)(r16) >> 6);
+ *b = STATIC_CAST(uint8_t, Clamp((int32_t)(b16) >> 6));
+ *g = STATIC_CAST(uint8_t, Clamp((int32_t)(g16) >> 6));
+ *r = STATIC_CAST(uint8_t, Clamp((int32_t)(r16) >> 6));
}
// Reads 8 bit YUV and leaves result as 16 bit.
@@ -1706,9 +1838,9 @@ static __inline void YuvPixel10_16(uint16_t y,
int* r,
const struct YuvConstants* yuvconstants) {
LOAD_YUV_CONSTANTS;
- uint32_t y32 = y << 6;
- u = clamp255(u >> 2);
- v = clamp255(v >> 2);
+ uint32_t y32 = (y << 6) | (y >> 4);
+ u = STATIC_CAST(uint8_t, clamp255(u >> 2));
+ v = STATIC_CAST(uint8_t, clamp255(v >> 2));
CALC_RGB16;
*b = b16;
*g = g16;
@@ -1725,9 +1857,9 @@ static __inline void YuvPixel12_16(int16_t y,
int* r,
const struct YuvConstants* yuvconstants) {
LOAD_YUV_CONSTANTS;
- uint32_t y32 = y << 4;
- u = clamp255(u >> 4);
- v = clamp255(v >> 4);
+ uint32_t y32 = (y << 4) | (y >> 8);
+ u = STATIC_CAST(uint8_t, clamp255(u >> 4));
+ v = STATIC_CAST(uint8_t, clamp255(v >> 4));
CALC_RGB16;
*b = b16;
*g = g16;
@@ -1747,9 +1879,9 @@ static __inline void YuvPixel10(uint16_t y,
int g16;
int r16;
YuvPixel10_16(y, u, v, &b16, &g16, &r16, yuvconstants);
- *b = Clamp(b16 >> 6);
- *g = Clamp(g16 >> 6);
- *r = Clamp(r16 >> 6);
+ *b = STATIC_CAST(uint8_t, Clamp(b16 >> 6));
+ *g = STATIC_CAST(uint8_t, Clamp(g16 >> 6));
+ *r = STATIC_CAST(uint8_t, Clamp(r16 >> 6));
}
// C reference code that mimics the YUV 12 bit assembly.
@@ -1765,9 +1897,9 @@ static __inline void YuvPixel12(uint16_t y,
int g16;
int r16;
YuvPixel12_16(y, u, v, &b16, &g16, &r16, yuvconstants);
- *b = Clamp(b16 >> 6);
- *g = Clamp(g16 >> 6);
- *r = Clamp(r16 >> 6);
+ *b = STATIC_CAST(uint8_t, Clamp(b16 >> 6));
+ *g = STATIC_CAST(uint8_t, Clamp(g16 >> 6));
+ *r = STATIC_CAST(uint8_t, Clamp(r16 >> 6));
}
// C reference code that mimics the YUV 16 bit assembly.
@@ -1781,12 +1913,12 @@ static __inline void YuvPixel16_8(uint16_t y,
const struct YuvConstants* yuvconstants) {
LOAD_YUV_CONSTANTS;
uint32_t y32 = y;
- u = clamp255(u >> 8);
- v = clamp255(v >> 8);
+ u = STATIC_CAST(uint16_t, clamp255(u >> 8));
+ v = STATIC_CAST(uint16_t, clamp255(v >> 8));
CALC_RGB16;
- *b = Clamp((int32_t)(b16) >> 6);
- *g = Clamp((int32_t)(g16) >> 6);
- *r = Clamp((int32_t)(r16) >> 6);
+ *b = STATIC_CAST(uint8_t, Clamp((int32_t)(b16) >> 6));
+ *g = STATIC_CAST(uint8_t, Clamp((int32_t)(g16) >> 6));
+ *r = STATIC_CAST(uint8_t, Clamp((int32_t)(r16) >> 6));
}
// C reference code that mimics the YUV 16 bit assembly.
@@ -1800,8 +1932,8 @@ static __inline void YuvPixel16_16(uint16_t y,
const struct YuvConstants* yuvconstants) {
LOAD_YUV_CONSTANTS;
uint32_t y32 = y;
- u = clamp255(u >> 8);
- v = clamp255(v >> 8);
+ u = STATIC_CAST(uint16_t, clamp255(u >> 8));
+ v = STATIC_CAST(uint16_t, clamp255(v >> 8));
CALC_RGB16;
*b = b16;
*g = g16;
@@ -1815,7 +1947,7 @@ static __inline void YPixel(uint8_t y,
uint8_t* g,
uint8_t* r,
const struct YuvConstants* yuvconstants) {
-#if defined(__aarch64__) || defined(__arm__)
+#if defined(__aarch64__) || defined(__arm__) || defined(__riscv)
int yg = yuvconstants->kRGBCoeffBias[0];
int ygb = yuvconstants->kRGBCoeffBias[4];
#else
@@ -1823,9 +1955,10 @@ static __inline void YPixel(uint8_t y,
int yg = yuvconstants->kYToRgb[0];
#endif
uint32_t y1 = (uint32_t)(y * 0x0101 * yg) >> 16;
- *b = Clamp(((int32_t)(y1) + ygb) >> 6);
- *g = Clamp(((int32_t)(y1) + ygb) >> 6);
- *r = Clamp(((int32_t)(y1) + ygb) >> 6);
+ uint8_t b8 = STATIC_CAST(uint8_t, Clamp(((int32_t)(y1) + ygb) >> 6));
+ *b = b8;
+ *g = b8;
+ *r = b8;
}
void I444ToARGBRow_C(const uint8_t* src_y,
@@ -1846,6 +1979,23 @@ void I444ToARGBRow_C(const uint8_t* src_y,
}
}
+void I444ToRGB24Row_C(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* rgb_buf,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ int x;
+ for (x = 0; x < width; ++x) {
+ YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
+ rgb_buf + 2, yuvconstants);
+ src_y += 1;
+ src_u += 1;
+ src_v += 1;
+ rgb_buf += 3; // Advance 1 pixel.
+ }
+}
+
// Also used for 420
void I422ToARGBRow_C(const uint8_t* src_y,
const uint8_t* src_u,
@@ -1929,10 +2079,10 @@ void I210AlphaToARGBRow_C(const uint16_t* src_y,
for (x = 0; x < width - 1; x += 2) {
YuvPixel10(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
rgb_buf + 2, yuvconstants);
- rgb_buf[3] = clamp255(src_a[0] >> 2);
+ rgb_buf[3] = STATIC_CAST(uint8_t, clamp255(src_a[0] >> 2));
YuvPixel10(src_y[1], src_u[0], src_v[0], rgb_buf + 4, rgb_buf + 5,
rgb_buf + 6, yuvconstants);
- rgb_buf[7] = clamp255(src_a[1] >> 2);
+ rgb_buf[7] = STATIC_CAST(uint8_t, clamp255(src_a[1] >> 2));
src_y += 2;
src_u += 1;
src_v += 1;
@@ -1942,7 +2092,7 @@ void I210AlphaToARGBRow_C(const uint16_t* src_y,
if (width & 1) {
YuvPixel10(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
rgb_buf + 2, yuvconstants);
- rgb_buf[3] = clamp255(src_a[0] >> 2);
+ rgb_buf[3] = STATIC_CAST(uint8_t, clamp255(src_a[0] >> 2));
}
}
@@ -1957,7 +2107,7 @@ void I410AlphaToARGBRow_C(const uint16_t* src_y,
for (x = 0; x < width; ++x) {
YuvPixel10(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
rgb_buf + 2, yuvconstants);
- rgb_buf[3] = clamp255(src_a[0] >> 2);
+ rgb_buf[3] = STATIC_CAST(uint8_t, clamp255(src_a[0] >> 2));
src_y += 1;
src_u += 1;
src_v += 1;
@@ -2283,8 +2433,10 @@ void I422ToARGB4444Row_C(const uint8_t* src_y,
b1 = b1 >> 4;
g1 = g1 >> 4;
r1 = r1 >> 4;
- *(uint16_t*)(dst_argb4444 + 0) = b0 | (g0 << 4) | (r0 << 8) | 0xf000;
- *(uint16_t*)(dst_argb4444 + 2) = b1 | (g1 << 4) | (r1 << 8) | 0xf000;
+ *(uint16_t*)(dst_argb4444 + 0) =
+ STATIC_CAST(uint16_t, b0 | (g0 << 4) | (r0 << 8) | 0xf000);
+ *(uint16_t*)(dst_argb4444 + 2) =
+ STATIC_CAST(uint16_t, b1 | (g1 << 4) | (r1 << 8) | 0xf000);
src_y += 2;
src_u += 1;
src_v += 1;
@@ -2295,7 +2447,8 @@ void I422ToARGB4444Row_C(const uint8_t* src_y,
b0 = b0 >> 4;
g0 = g0 >> 4;
r0 = r0 >> 4;
- *(uint16_t*)(dst_argb4444) = b0 | (g0 << 4) | (r0 << 8) | 0xf000;
+ *(uint16_t*)(dst_argb4444) =
+ STATIC_CAST(uint16_t, b0 | (g0 << 4) | (r0 << 8) | 0xf000);
}
}
@@ -2321,8 +2474,10 @@ void I422ToARGB1555Row_C(const uint8_t* src_y,
b1 = b1 >> 3;
g1 = g1 >> 3;
r1 = r1 >> 3;
- *(uint16_t*)(dst_argb1555 + 0) = b0 | (g0 << 5) | (r0 << 10) | 0x8000;
- *(uint16_t*)(dst_argb1555 + 2) = b1 | (g1 << 5) | (r1 << 10) | 0x8000;
+ *(uint16_t*)(dst_argb1555 + 0) =
+ STATIC_CAST(uint16_t, b0 | (g0 << 5) | (r0 << 10) | 0x8000);
+ *(uint16_t*)(dst_argb1555 + 2) =
+ STATIC_CAST(uint16_t, b1 | (g1 << 5) | (r1 << 10) | 0x8000);
src_y += 2;
src_u += 1;
src_v += 1;
@@ -2333,7 +2488,8 @@ void I422ToARGB1555Row_C(const uint8_t* src_y,
b0 = b0 >> 3;
g0 = g0 >> 3;
r0 = r0 >> 3;
- *(uint16_t*)(dst_argb1555) = b0 | (g0 << 5) | (r0 << 10) | 0x8000;
+ *(uint16_t*)(dst_argb1555) =
+ STATIC_CAST(uint16_t, b0 | (g0 << 5) | (r0 << 10) | 0x8000);
}
}
@@ -2359,8 +2515,10 @@ void I422ToRGB565Row_C(const uint8_t* src_y,
b1 = b1 >> 3;
g1 = g1 >> 2;
r1 = r1 >> 3;
- *(uint16_t*)(dst_rgb565 + 0) = b0 | (g0 << 5) | (r0 << 11); // for ubsan
- *(uint16_t*)(dst_rgb565 + 2) = b1 | (g1 << 5) | (r1 << 11);
+ *(uint16_t*)(dst_rgb565 + 0) =
+ STATIC_CAST(uint16_t, b0 | (g0 << 5) | (r0 << 11));
+ *(uint16_t*)(dst_rgb565 + 2) =
+ STATIC_CAST(uint16_t, b1 | (g1 << 5) | (r1 << 11));
src_y += 2;
src_u += 1;
src_v += 1;
@@ -2371,7 +2529,8 @@ void I422ToRGB565Row_C(const uint8_t* src_y,
b0 = b0 >> 3;
g0 = g0 >> 2;
r0 = r0 >> 3;
- *(uint16_t*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11);
+ *(uint16_t*)(dst_rgb565 + 0) =
+ STATIC_CAST(uint16_t, b0 | (g0 << 5) | (r0 << 11));
}
}
@@ -2486,8 +2645,12 @@ void NV12ToRGB565Row_C(const uint8_t* src_y,
b1 = b1 >> 3;
g1 = g1 >> 2;
r1 = r1 >> 3;
- *(uint16_t*)(dst_rgb565 + 0) = b0 | (g0 << 5) | (r0 << 11);
- *(uint16_t*)(dst_rgb565 + 2) = b1 | (g1 << 5) | (r1 << 11);
+ *(uint16_t*)(dst_rgb565 + 0) = STATIC_CAST(uint16_t, b0) |
+ STATIC_CAST(uint16_t, g0 << 5) |
+ STATIC_CAST(uint16_t, r0 << 11);
+ *(uint16_t*)(dst_rgb565 + 2) = STATIC_CAST(uint16_t, b1) |
+ STATIC_CAST(uint16_t, g1 << 5) |
+ STATIC_CAST(uint16_t, r1 << 11);
src_y += 2;
src_uv += 2;
dst_rgb565 += 4; // Advance 2 pixels.
@@ -2497,7 +2660,9 @@ void NV12ToRGB565Row_C(const uint8_t* src_y,
b0 = b0 >> 3;
g0 = g0 >> 2;
r0 = r0 >> 3;
- *(uint16_t*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11);
+ *(uint16_t*)(dst_rgb565) = STATIC_CAST(uint16_t, b0) |
+ STATIC_CAST(uint16_t, g0 << 5) |
+ STATIC_CAST(uint16_t, r0 << 11);
}
}
@@ -2603,6 +2768,19 @@ void MirrorRow_C(const uint8_t* src, uint8_t* dst, int width) {
}
}
+void MirrorRow_16_C(const uint16_t* src, uint16_t* dst, int width) {
+ int x;
+ src += width - 1;
+ for (x = 0; x < width - 1; x += 2) {
+ dst[x] = src[0];
+ dst[x + 1] = src[-1];
+ src -= 2;
+ }
+ if (width & 1) {
+ dst[width - 1] = src[0];
+ }
+}
+
void MirrorUVRow_C(const uint8_t* src_uv, uint8_t* dst_uv, int width) {
int x;
src_uv += (width - 1) << 1;
@@ -2714,6 +2892,21 @@ void DetileRow_C(const uint8_t* src,
}
}
+void DetileRow_16_C(const uint16_t* src,
+ ptrdiff_t src_tile_stride,
+ uint16_t* dst,
+ int width) {
+ int x;
+ for (x = 0; x < width - 15; x += 16) {
+ memcpy(dst, src, 16 * sizeof(uint16_t));
+ dst += 16;
+ src += src_tile_stride;
+ }
+ if (width & 15) {
+ memcpy(dst, src, (width & 15) * sizeof(uint16_t));
+ }
+}
+
void DetileSplitUVRow_C(const uint8_t* src_uv,
ptrdiff_t src_tile_stride,
uint8_t* dst_u,
@@ -2731,6 +2924,51 @@ void DetileSplitUVRow_C(const uint8_t* src_uv,
}
}
+void DetileToYUY2_C(const uint8_t* src_y,
+ ptrdiff_t src_y_tile_stride,
+ const uint8_t* src_uv,
+ ptrdiff_t src_uv_tile_stride,
+ uint8_t* dst_yuy2,
+ int width) {
+ for (int x = 0; x < width - 15; x += 16) {
+ for (int i = 0; i < 8; i++) {
+ dst_yuy2[0] = src_y[0];
+ dst_yuy2[1] = src_uv[0];
+ dst_yuy2[2] = src_y[1];
+ dst_yuy2[3] = src_uv[1];
+ dst_yuy2 += 4;
+ src_y += 2;
+ src_uv += 2;
+ }
+ src_y += src_y_tile_stride - 16;
+ src_uv += src_uv_tile_stride - 16;
+ }
+}
+
+// Unpack MT2T into tiled P010 64 pixels at a time. MT2T's bitstream is encoded
+// in 80 byte blocks representing 64 pixels each. The first 16 bytes of the
+// block contain all of the lower 2 bits of each pixel packed together, and the
+// next 64 bytes represent all the upper 8 bits of the pixel. The lower bits are
+// packed into 1x4 blocks, whereas the upper bits are packed in normal raster
+// order.
+void UnpackMT2T_C(const uint8_t* src, uint16_t* dst, size_t size) {
+ for (size_t i = 0; i < size; i += 80) {
+ const uint8_t* src_lower_bits = src;
+ const uint8_t* src_upper_bits = src + 16;
+
+ for (int j = 0; j < 4; j++) {
+ for (int k = 0; k < 16; k++) {
+ *dst++ = ((src_lower_bits[k] >> (j * 2)) & 0x3) << 6 |
+ (uint16_t)*src_upper_bits << 8 |
+ (uint16_t)*src_upper_bits >> 2;
+ src_upper_bits++;
+ }
+ }
+
+ src += 80;
+ }
+}
+
void SplitRGBRow_C(const uint8_t* src_rgb,
uint8_t* dst_r,
uint8_t* dst_g,
@@ -2823,10 +3061,10 @@ void MergeAR64Row_C(const uint16_t* src_r,
int shift = 16 - depth;
int max = (1 << depth) - 1;
for (x = 0; x < width; ++x) {
- dst_ar64[0] = ClampMax(src_b[x], max) << shift;
- dst_ar64[1] = ClampMax(src_g[x], max) << shift;
- dst_ar64[2] = ClampMax(src_r[x], max) << shift;
- dst_ar64[3] = ClampMax(src_a[x], max) << shift;
+ dst_ar64[0] = STATIC_CAST(uint16_t, ClampMax(src_b[x], max) << shift);
+ dst_ar64[1] = STATIC_CAST(uint16_t, ClampMax(src_g[x], max) << shift);
+ dst_ar64[2] = STATIC_CAST(uint16_t, ClampMax(src_r[x], max) << shift);
+ dst_ar64[3] = STATIC_CAST(uint16_t, ClampMax(src_a[x], max) << shift);
dst_ar64 += 4;
}
}
@@ -2843,10 +3081,10 @@ void MergeARGB16To8Row_C(const uint16_t* src_r,
int x;
int shift = depth - 8;
for (x = 0; x < width; ++x) {
- dst_argb[0] = clamp255(src_b[x] >> shift);
- dst_argb[1] = clamp255(src_g[x] >> shift);
- dst_argb[2] = clamp255(src_r[x] >> shift);
- dst_argb[3] = clamp255(src_a[x] >> shift);
+ dst_argb[0] = STATIC_CAST(uint8_t, clamp255(src_b[x] >> shift));
+ dst_argb[1] = STATIC_CAST(uint8_t, clamp255(src_g[x] >> shift));
+ dst_argb[2] = STATIC_CAST(uint8_t, clamp255(src_r[x] >> shift));
+ dst_argb[3] = STATIC_CAST(uint8_t, clamp255(src_a[x] >> shift));
dst_argb += 4;
}
}
@@ -2863,9 +3101,9 @@ void MergeXR64Row_C(const uint16_t* src_r,
int shift = 16 - depth;
int max = (1 << depth) - 1;
for (x = 0; x < width; ++x) {
- dst_ar64[0] = ClampMax(src_b[x], max) << shift;
- dst_ar64[1] = ClampMax(src_g[x], max) << shift;
- dst_ar64[2] = ClampMax(src_r[x], max) << shift;
+ dst_ar64[0] = STATIC_CAST(uint16_t, ClampMax(src_b[x], max) << shift);
+ dst_ar64[1] = STATIC_CAST(uint16_t, ClampMax(src_g[x], max) << shift);
+ dst_ar64[2] = STATIC_CAST(uint16_t, ClampMax(src_r[x], max) << shift);
dst_ar64[3] = 0xffff;
dst_ar64 += 4;
}
@@ -2882,9 +3120,9 @@ void MergeXRGB16To8Row_C(const uint16_t* src_r,
int x;
int shift = depth - 8;
for (x = 0; x < width; ++x) {
- dst_argb[0] = clamp255(src_b[x] >> shift);
- dst_argb[1] = clamp255(src_g[x] >> shift);
- dst_argb[2] = clamp255(src_r[x] >> shift);
+ dst_argb[0] = STATIC_CAST(uint8_t, clamp255(src_b[x] >> shift));
+ dst_argb[1] = STATIC_CAST(uint8_t, clamp255(src_g[x] >> shift));
+ dst_argb[2] = STATIC_CAST(uint8_t, clamp255(src_r[x] >> shift));
dst_argb[3] = 0xff;
dst_argb += 4;
}
@@ -2930,8 +3168,8 @@ void MergeUVRow_16_C(const uint16_t* src_u,
assert(depth <= 16);
int x;
for (x = 0; x < width; ++x) {
- dst_uv[0] = src_u[x] << shift;
- dst_uv[1] = src_v[x] << shift;
+ dst_uv[0] = STATIC_CAST(uint16_t, src_u[x] << shift);
+ dst_uv[1] = STATIC_CAST(uint16_t, src_v[x] << shift);
dst_uv += 2;
}
}
@@ -2959,7 +3197,7 @@ void MultiplyRow_16_C(const uint16_t* src_y,
int width) {
int x;
for (x = 0; x < width; ++x) {
- dst_y[x] = src_y[x] * scale;
+ dst_y[x] = STATIC_CAST(uint16_t, src_y[x] * scale);
}
}
@@ -2990,7 +3228,7 @@ void Convert16To8Row_C(const uint16_t* src_y,
assert(scale <= 32768);
for (x = 0; x < width; ++x) {
- dst_y[x] = C16TO8(src_y[x], scale);
+ dst_y[x] = STATIC_CAST(uint8_t, C16TO8(src_y[x], scale));
}
}
@@ -3043,6 +3281,21 @@ void YUY2ToUVRow_C(const uint8_t* src_yuy2,
}
}
+// Filter 2 rows of YUY2 UV's (422) into UV (NV12).
+void YUY2ToNVUVRow_C(const uint8_t* src_yuy2,
+ int src_stride_yuy2,
+ uint8_t* dst_uv,
+ int width) {
+ // Output a row of UV values, filtering 2 rows of YUY2.
+ int x;
+ for (x = 0; x < width; x += 2) {
+ dst_uv[0] = (src_yuy2[1] + src_yuy2[src_stride_yuy2 + 1] + 1) >> 1;
+ dst_uv[1] = (src_yuy2[3] + src_yuy2[src_stride_yuy2 + 3] + 1) >> 1;
+ src_yuy2 += 4;
+ dst_uv += 2;
+ }
+}
+
// Copy row of YUY2 UV's (422) into U and V (422).
void YUY2ToUV422Row_C(const uint8_t* src_yuy2,
uint8_t* dst_u,
@@ -3138,9 +3391,9 @@ void ARGBBlendRow_C(const uint8_t* src_argb,
uint32_t bb = src_argb1[0];
uint32_t bg = src_argb1[1];
uint32_t br = src_argb1[2];
- dst_argb[0] = BLEND(fb, bb, a);
- dst_argb[1] = BLEND(fg, bg, a);
- dst_argb[2] = BLEND(fr, br, a);
+ dst_argb[0] = STATIC_CAST(uint8_t, BLEND(fb, bb, a));
+ dst_argb[1] = STATIC_CAST(uint8_t, BLEND(fg, bg, a));
+ dst_argb[2] = STATIC_CAST(uint8_t, BLEND(fr, br, a));
dst_argb[3] = 255u;
fb = src_argb[4 + 0];
@@ -3150,9 +3403,9 @@ void ARGBBlendRow_C(const uint8_t* src_argb,
bb = src_argb1[4 + 0];
bg = src_argb1[4 + 1];
br = src_argb1[4 + 2];
- dst_argb[4 + 0] = BLEND(fb, bb, a);
- dst_argb[4 + 1] = BLEND(fg, bg, a);
- dst_argb[4 + 2] = BLEND(fr, br, a);
+ dst_argb[4 + 0] = STATIC_CAST(uint8_t, BLEND(fb, bb, a));
+ dst_argb[4 + 1] = STATIC_CAST(uint8_t, BLEND(fg, bg, a));
+ dst_argb[4 + 2] = STATIC_CAST(uint8_t, BLEND(fr, br, a));
dst_argb[4 + 3] = 255u;
src_argb += 8;
src_argb1 += 8;
@@ -3167,9 +3420,9 @@ void ARGBBlendRow_C(const uint8_t* src_argb,
uint32_t bb = src_argb1[0];
uint32_t bg = src_argb1[1];
uint32_t br = src_argb1[2];
- dst_argb[0] = BLEND(fb, bb, a);
- dst_argb[1] = BLEND(fg, bg, a);
- dst_argb[2] = BLEND(fr, br, a);
+ dst_argb[0] = STATIC_CAST(uint8_t, BLEND(fb, bb, a));
+ dst_argb[1] = STATIC_CAST(uint8_t, BLEND(fg, bg, a));
+ dst_argb[2] = STATIC_CAST(uint8_t, BLEND(fr, br, a));
dst_argb[3] = 255u;
}
}
@@ -3196,12 +3449,7 @@ void BlendPlaneRow_C(const uint8_t* src0,
}
#undef UBLEND
-#if LIBYUV_ATTENUATE_DUP
-// This code mimics the SSSE3 version for better testability.
-#define ATTENUATE(f, a) (a | (a << 8)) * (f | (f << 8)) >> 24
-#else
-#define ATTENUATE(f, a) (f * a + 128) >> 8
-#endif
+#define ATTENUATE(f, a) (f * a + 255) >> 8
// Multiply source RGB by alpha and store to destination.
void ARGBAttenuateRow_C(const uint8_t* src_argb, uint8_t* dst_argb, int width) {
@@ -3214,7 +3462,7 @@ void ARGBAttenuateRow_C(const uint8_t* src_argb, uint8_t* dst_argb, int width) {
dst_argb[0] = ATTENUATE(b, a);
dst_argb[1] = ATTENUATE(g, a);
dst_argb[2] = ATTENUATE(r, a);
- dst_argb[3] = a;
+ dst_argb[3] = STATIC_CAST(uint8_t, a);
b = src_argb[4];
g = src_argb[5];
r = src_argb[6];
@@ -3222,7 +3470,7 @@ void ARGBAttenuateRow_C(const uint8_t* src_argb, uint8_t* dst_argb, int width) {
dst_argb[4] = ATTENUATE(b, a);
dst_argb[5] = ATTENUATE(g, a);
dst_argb[6] = ATTENUATE(r, a);
- dst_argb[7] = a;
+ dst_argb[7] = STATIC_CAST(uint8_t, a);
src_argb += 8;
dst_argb += 8;
}
@@ -3235,7 +3483,7 @@ void ARGBAttenuateRow_C(const uint8_t* src_argb, uint8_t* dst_argb, int width) {
dst_argb[0] = ATTENUATE(b, a);
dst_argb[1] = ATTENUATE(g, a);
dst_argb[2] = ATTENUATE(r, a);
- dst_argb[3] = a;
+ dst_argb[3] = STATIC_CAST(uint8_t, a);
}
}
#undef ATTENUATE
@@ -3307,10 +3555,10 @@ void ARGBUnattenuateRow_C(const uint8_t* src_argb,
const uint32_t ia = fixed_invtbl8[a] & 0xffff; // 8.8 fixed point
// Clamping should not be necessary but is free in assembly.
- dst_argb[0] = UNATTENUATE(b, ia);
- dst_argb[1] = UNATTENUATE(g, ia);
- dst_argb[2] = UNATTENUATE(r, ia);
- dst_argb[3] = a;
+ dst_argb[0] = STATIC_CAST(uint8_t, UNATTENUATE(b, ia));
+ dst_argb[1] = STATIC_CAST(uint8_t, UNATTENUATE(g, ia));
+ dst_argb[2] = STATIC_CAST(uint8_t, UNATTENUATE(r, ia));
+ dst_argb[3] = STATIC_CAST(uint8_t, a);
src_argb += 4;
dst_argb += 4;
}
@@ -3344,12 +3592,20 @@ void CumulativeSumToAverageRow_C(const int32_t* tl,
int i;
assert(area != 0);
- ooa = 1.0f / area;
+ ooa = 1.0f / STATIC_CAST(float, area);
for (i = 0; i < count; ++i) {
- dst[0] = (uint8_t)((bl[w + 0] + tl[0] - bl[0] - tl[w + 0]) * ooa);
- dst[1] = (uint8_t)((bl[w + 1] + tl[1] - bl[1] - tl[w + 1]) * ooa);
- dst[2] = (uint8_t)((bl[w + 2] + tl[2] - bl[2] - tl[w + 2]) * ooa);
- dst[3] = (uint8_t)((bl[w + 3] + tl[3] - bl[3] - tl[w + 3]) * ooa);
+ dst[0] =
+ (uint8_t)(STATIC_CAST(float, bl[w + 0] + tl[0] - bl[0] - tl[w + 0]) *
+ ooa);
+ dst[1] =
+ (uint8_t)(STATIC_CAST(float, bl[w + 1] + tl[1] - bl[1] - tl[w + 1]) *
+ ooa);
+ dst[2] =
+ (uint8_t)(STATIC_CAST(float, bl[w + 2] + tl[2] - bl[2] - tl[w + 2]) *
+ ooa);
+ dst[3] =
+ (uint8_t)(STATIC_CAST(float, bl[w + 3] + tl[3] - bl[3] - tl[w + 3]) *
+ ooa);
dst += 4;
tl += 4;
bl += 4;
@@ -3407,7 +3663,9 @@ static void HalfRow_16To8_C(const uint16_t* src_uv,
int width) {
int x;
for (x = 0; x < width; ++x) {
- dst_uv[x] = C16TO8((src_uv[x] + src_uv[src_uv_stride + x] + 1) >> 1, scale);
+ dst_uv[x] = STATIC_CAST(
+ uint8_t,
+ C16TO8((src_uv[x] + src_uv[src_uv_stride + x] + 1) >> 1, scale));
}
}
@@ -3433,8 +3691,9 @@ void InterpolateRow_C(uint8_t* dst_ptr,
return;
}
for (x = 0; x < width; ++x) {
- dst_ptr[0] =
- (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction + 128) >> 8;
+ dst_ptr[0] = STATIC_CAST(
+ uint8_t,
+ (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction + 128) >> 8);
++src_ptr;
++src_ptr1;
++dst_ptr;
@@ -3463,8 +3722,9 @@ void InterpolateRow_16_C(uint16_t* dst_ptr,
return;
}
for (x = 0; x < width; ++x) {
- dst_ptr[0] =
- (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction + 128) >> 8;
+ dst_ptr[0] = STATIC_CAST(
+ uint16_t,
+ (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction + 128) >> 8);
++src_ptr;
++src_ptr1;
++dst_ptr;
@@ -3501,9 +3761,11 @@ void InterpolateRow_16To8_C(uint8_t* dst_ptr,
return;
}
for (x = 0; x < width; ++x) {
- dst_ptr[0] = C16TO8(
- (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction + 128) >> 8,
- scale);
+ dst_ptr[0] = STATIC_CAST(
+ uint8_t,
+ C16TO8(
+ (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction + 128) >> 8,
+ scale));
src_ptr += 1;
src_ptr1 += 1;
dst_ptr += 1;
@@ -3615,10 +3877,10 @@ void ARGBPolynomialRow_C(const uint8_t* src_argb,
dr += poly[14] * r3;
da += poly[15] * a3;
- dst_argb[0] = Clamp((int32_t)(db));
- dst_argb[1] = Clamp((int32_t)(dg));
- dst_argb[2] = Clamp((int32_t)(dr));
- dst_argb[3] = Clamp((int32_t)(da));
+ dst_argb[0] = STATIC_CAST(uint8_t, Clamp((int32_t)(db)));
+ dst_argb[1] = STATIC_CAST(uint8_t, Clamp((int32_t)(dg)));
+ dst_argb[2] = STATIC_CAST(uint8_t, Clamp((int32_t)(dr)));
+ dst_argb[3] = STATIC_CAST(uint8_t, Clamp((int32_t)(da)));
src_argb += 4;
dst_argb += 4;
}
@@ -4023,6 +4285,32 @@ void I422ToRGB24Row_AVX2(const uint8_t* src_y,
}
#endif
+#if defined(HAS_I444TORGB24ROW_AVX2)
+void I444ToRGB24Row_AVX2(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_rgb24,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ // Row buffer for intermediate ARGB pixels.
+ SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
+ while (width > 0) {
+ int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
+ I444ToARGBRow_AVX2(src_y, src_u, src_v, row, yuvconstants, twidth);
+#if defined(HAS_ARGBTORGB24ROW_AVX2)
+ ARGBToRGB24Row_AVX2(row, dst_rgb24, twidth);
+#else
+ ARGBToRGB24Row_SSSE3(row, dst_rgb24, twidth);
+#endif
+ src_y += twidth;
+ src_u += twidth;
+ src_v += twidth;
+ dst_rgb24 += twidth * 3;
+ width -= twidth;
+ }
+}
+#endif
+
#if defined(HAS_NV12TORGB565ROW_AVX2)
void NV12ToRGB565Row_AVX2(const uint8_t* src_y,
const uint8_t* src_uv,
@@ -4164,8 +4452,9 @@ void ScaleSamples_C(const float* src, float* dst, float scale, int width) {
void GaussRow_C(const uint32_t* src, uint16_t* dst, int width) {
int i;
for (i = 0; i < width; ++i) {
- *dst++ =
- (src[0] + src[1] * 4 + src[2] * 6 + src[3] * 4 + src[4] + 128) >> 8;
+ *dst++ = STATIC_CAST(
+ uint16_t,
+ (src[0] + src[1] * 4 + src[2] * 6 + src[3] * 4 + src[4] + 128) >> 8);
++src;
}
}
@@ -4325,6 +4614,8 @@ void HalfMergeUVRow_C(const uint8_t* src_u,
}
}
+#undef STATIC_CAST
+
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
diff --git a/files/source/row_gcc.cc b/source/row_gcc.cc
index dce8c439..d8074987 100644
--- a/files/source/row_gcc.cc
+++ b/source/row_gcc.cc
@@ -27,6 +27,9 @@ static const uvec8 kARGBToY = {25u, 129u, 66u, 0u, 25u, 129u, 66u, 0u,
static const uvec8 kARGBToYJ = {29u, 150u, 77u, 0u, 29u, 150u, 77u, 0u,
29u, 150u, 77u, 0u, 29u, 150u, 77u, 0u};
+static const uvec8 kABGRToYJ = {77u, 150u, 29u, 0u, 77u, 150u, 29u, 0u,
+ 77u, 150u, 29u, 0u, 77u, 150u, 29u, 0u};
+
static const uvec8 kRGBAToYJ = {0u, 29u, 150u, 77u, 0u, 29u, 150u, 77u,
0u, 29u, 150u, 77u, 0u, 29u, 150u, 77u};
#endif // defined(HAS_ARGBTOYROW_SSSE3) || defined(HAS_ARGBGRAYROW_SSSE3)
@@ -39,12 +42,18 @@ static const vec8 kARGBToU = {112, -74, -38, 0, 112, -74, -38, 0,
static const vec8 kARGBToUJ = {127, -84, -43, 0, 127, -84, -43, 0,
127, -84, -43, 0, 127, -84, -43, 0};
+static const vec8 kABGRToUJ = {-43, -84, 127, 0, -43, -84, 127, 0,
+ -43, -84, 127, 0, -43, -84, 127, 0};
+
static const vec8 kARGBToV = {-18, -94, 112, 0, -18, -94, 112, 0,
-18, -94, 112, 0, -18, -94, 112, 0};
static const vec8 kARGBToVJ = {-20, -107, 127, 0, -20, -107, 127, 0,
-20, -107, 127, 0, -20, -107, 127, 0};
+static const vec8 kABGRToVJ = {127, -107, -20, 0, 127, -107, -20, 0,
+ 127, -107, -20, 0, 127, -107, -20, 0};
+
// Constants for BGRA
static const uvec8 kBGRAToY = {0u, 66u, 129u, 25u, 0u, 66u, 129u, 25u,
0u, 66u, 129u, 25u, 0u, 66u, 129u, 25u};
@@ -729,7 +738,7 @@ void ARGBToRGB565Row_SSE2(const uint8_t* src, uint8_t* dst, int width) {
void ARGBToRGB565DitherRow_SSE2(const uint8_t* src,
uint8_t* dst,
- const uint32_t dither4,
+ uint32_t dither4,
int width) {
asm volatile(
"movd %3,%%xmm6 \n"
@@ -777,7 +786,7 @@ void ARGBToRGB565DitherRow_SSE2(const uint8_t* src,
#ifdef HAS_ARGBTORGB565DITHERROW_AVX2
void ARGBToRGB565DitherRow_AVX2(const uint8_t* src,
uint8_t* dst,
- const uint32_t dither4,
+ uint32_t dither4,
int width) {
asm volatile(
"vbroadcastss %3,%%xmm6 \n"
@@ -1201,6 +1210,7 @@ void ARGBToAR64Row_AVX2(const uint8_t* src_argb,
"lea 0x40(%1),%1 \n"
"sub $0x8,%2 \n"
"jg 1b \n"
+ "vzeroupper \n"
: "+r"(src_argb), // %0
"+r"(dst_ar64), // %1
"+r"(width) // %2
@@ -1228,6 +1238,7 @@ void ARGBToAB64Row_AVX2(const uint8_t* src_argb,
"lea 0x40(%1),%1 \n"
"sub $0x8,%2 \n"
"jg 1b \n"
+ "vzeroupper \n"
: "+r"(src_argb), // %0
"+r"(dst_ab64), // %1
"+r"(width) // %2
@@ -1256,6 +1267,7 @@ void AR64ToARGBRow_AVX2(const uint16_t* src_ar64,
"lea 0x20(%1),%1 \n"
"sub $0x8,%2 \n"
"jg 1b \n"
+ "vzeroupper \n"
: "+r"(src_ar64), // %0
"+r"(dst_argb), // %1
"+r"(width) // %2
@@ -1284,6 +1296,7 @@ void AB64ToARGBRow_AVX2(const uint16_t* src_ab64,
"lea 0x20(%1),%1 \n"
"sub $0x8,%2 \n"
"jg 1b \n"
+ "vzeroupper \n"
: "+r"(src_ab64), // %0
"+r"(dst_argb), // %1
"+r"(width) // %2
@@ -1398,6 +1411,24 @@ void ARGBToYJRow_SSSE3(const uint8_t* src_argb, uint8_t* dst_y, int width) {
}
#endif // HAS_ARGBTOYJROW_SSSE3
+#ifdef HAS_ABGRTOYJROW_SSSE3
+// Convert 16 ABGR pixels (64 bytes) to 16 YJ values.
+// Same as ABGRToYRow but different coefficients, no add 16.
+void ABGRToYJRow_SSSE3(const uint8_t* src_abgr, uint8_t* dst_y, int width) {
+ asm volatile(
+ "movdqa %3,%%xmm4 \n"
+ "movdqa %4,%%xmm5 \n"
+
+ LABELALIGN RGBTOY(xmm5)
+ : "+r"(src_abgr), // %0
+ "+r"(dst_y), // %1
+ "+r"(width) // %2
+ : "m"(kABGRToYJ), // %3
+ "m"(kSub128) // %4
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6");
+}
+#endif // HAS_ABGRTOYJROW_SSSE3
+
#ifdef HAS_RGBATOYJROW_SSSE3
// Convert 16 ARGB pixels (64 bytes) to 16 YJ values.
// Same as ARGBToYRow but different coefficients, no add 16.
@@ -1416,7 +1447,8 @@ void RGBAToYJRow_SSSE3(const uint8_t* src_rgba, uint8_t* dst_y, int width) {
}
#endif // HAS_RGBATOYJROW_SSSE3
-#if defined(HAS_ARGBTOYROW_AVX2) || defined(HAS_ARGBEXTRACTALPHAROW_AVX2)
+#if defined(HAS_ARGBTOYROW_AVX2) || defined(HAS_ABGRTOYROW_AVX2) || \
+ defined(HAS_ARGBEXTRACTALPHAROW_AVX2)
// vpermd for vphaddw + vpackuswb vpermd.
static const lvec32 kPermdARGBToY_AVX = {0, 4, 1, 5, 2, 6, 3, 7};
#endif
@@ -1429,9 +1461,8 @@ void ARGBToYRow_AVX2(const uint8_t* src_argb, uint8_t* dst_y, int width) {
"vbroadcastf128 %3,%%ymm4 \n"
"vbroadcastf128 %4,%%ymm5 \n"
"vbroadcastf128 %5,%%ymm7 \n"
- "vmovdqu %6,%%ymm6 \n"
-
- LABELALIGN RGBTOY_AVX2(ymm7)
+ "vmovdqu %6,%%ymm6 \n" LABELALIGN RGBTOY_AVX2(
+ ymm7) "vzeroupper \n"
: "+r"(src_argb), // %0
"+r"(dst_y), // %1
"+r"(width) // %2
@@ -1451,9 +1482,8 @@ void ABGRToYRow_AVX2(const uint8_t* src_abgr, uint8_t* dst_y, int width) {
"vbroadcastf128 %3,%%ymm4 \n"
"vbroadcastf128 %4,%%ymm5 \n"
"vbroadcastf128 %5,%%ymm7 \n"
- "vmovdqu %6,%%ymm6 \n"
-
- LABELALIGN RGBTOY_AVX2(ymm7)
+ "vmovdqu %6,%%ymm6 \n" LABELALIGN RGBTOY_AVX2(
+ ymm7) "vzeroupper \n"
: "+r"(src_abgr), // %0
"+r"(dst_y), // %1
"+r"(width) // %2
@@ -1472,9 +1502,8 @@ void ARGBToYJRow_AVX2(const uint8_t* src_argb, uint8_t* dst_y, int width) {
asm volatile(
"vbroadcastf128 %3,%%ymm4 \n"
"vbroadcastf128 %4,%%ymm5 \n"
- "vmovdqu %5,%%ymm6 \n"
-
- LABELALIGN RGBTOY_AVX2(ymm5)
+ "vmovdqu %5,%%ymm6 \n" LABELALIGN RGBTOY_AVX2(
+ ymm5) "vzeroupper \n"
: "+r"(src_argb), // %0
"+r"(dst_y), // %1
"+r"(width) // %2
@@ -1486,15 +1515,32 @@ void ARGBToYJRow_AVX2(const uint8_t* src_argb, uint8_t* dst_y, int width) {
}
#endif // HAS_ARGBTOYJROW_AVX2
+#ifdef HAS_ABGRTOYJROW_AVX2
+// Convert 32 ABGR pixels (128 bytes) to 32 Y values.
+void ABGRToYJRow_AVX2(const uint8_t* src_abgr, uint8_t* dst_y, int width) {
+ asm volatile(
+ "vbroadcastf128 %3,%%ymm4 \n"
+ "vbroadcastf128 %4,%%ymm5 \n"
+ "vmovdqu %5,%%ymm6 \n" LABELALIGN RGBTOY_AVX2(
+ ymm5) "vzeroupper \n"
+ : "+r"(src_abgr), // %0
+ "+r"(dst_y), // %1
+ "+r"(width) // %2
+ : "m"(kABGRToYJ), // %3
+ "m"(kSub128), // %4
+ "m"(kPermdARGBToY_AVX) // %5
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6",
+ "xmm7");
+}
+#endif // HAS_ABGRTOYJROW_AVX2
+
#ifdef HAS_RGBATOYJROW_AVX2
// Convert 32 ARGB pixels (128 bytes) to 32 Y values.
void RGBAToYJRow_AVX2(const uint8_t* src_rgba, uint8_t* dst_y, int width) {
asm volatile(
"vbroadcastf128 %3,%%ymm4 \n"
"vbroadcastf128 %4,%%ymm5 \n"
- "vmovdqu %5,%%ymm6 \n"
-
- LABELALIGN RGBTOY_AVX2(
+ "vmovdqu %5,%%ymm6 \n" LABELALIGN RGBTOY_AVX2(
ymm5) "vzeroupper \n"
: "+r"(src_rgba), // %0
"+r"(dst_y), // %1
@@ -1571,11 +1617,15 @@ void ARGBToUVRow_SSSE3(const uint8_t* src_argb,
}
#endif // HAS_ARGBTOUVROW_SSSE3
-#ifdef HAS_ARGBTOUVROW_AVX2
+#if defined(HAS_ARGBTOUVROW_AVX2) || defined(HAS_ABGRTOUVROW_AVX2) || \
+ defined(HAS_ARGBTOUVJROW_AVX2) || defined(HAS_ABGRTOUVJROW_AVX2)
// vpshufb for vphaddw + vpackuswb packed to shorts.
static const lvec8 kShufARGBToUV_AVX = {
0, 1, 8, 9, 2, 3, 10, 11, 4, 5, 12, 13, 6, 7, 14, 15,
0, 1, 8, 9, 2, 3, 10, 11, 4, 5, 12, 13, 6, 7, 14, 15};
+#endif
+
+#if defined(HAS_ARGBTOUVROW_AVX2)
void ARGBToUVRow_AVX2(const uint8_t* src_argb,
int src_stride_argb,
uint8_t* dst_u,
@@ -1765,6 +1815,71 @@ void ARGBToUVJRow_AVX2(const uint8_t* src_argb,
}
#endif // HAS_ARGBTOUVJROW_AVX2
+// TODO(fbarchard): Pass kABGRToVJ / kABGRToUJ as matrix
+#ifdef HAS_ABGRTOUVJROW_AVX2
+void ABGRToUVJRow_AVX2(const uint8_t* src_abgr,
+ int src_stride_abgr,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width) {
+ asm volatile(
+ "vbroadcastf128 %5,%%ymm5 \n"
+ "vbroadcastf128 %6,%%ymm6 \n"
+ "vbroadcastf128 %7,%%ymm7 \n"
+ "sub %1,%2 \n"
+
+ LABELALIGN
+ "1: \n"
+ "vmovdqu (%0),%%ymm0 \n"
+ "vmovdqu 0x20(%0),%%ymm1 \n"
+ "vmovdqu 0x40(%0),%%ymm2 \n"
+ "vmovdqu 0x60(%0),%%ymm3 \n"
+ "vpavgb 0x00(%0,%4,1),%%ymm0,%%ymm0 \n"
+ "vpavgb 0x20(%0,%4,1),%%ymm1,%%ymm1 \n"
+ "vpavgb 0x40(%0,%4,1),%%ymm2,%%ymm2 \n"
+ "vpavgb 0x60(%0,%4,1),%%ymm3,%%ymm3 \n"
+ "lea 0x80(%0),%0 \n"
+ "vshufps $0x88,%%ymm1,%%ymm0,%%ymm4 \n"
+ "vshufps $0xdd,%%ymm1,%%ymm0,%%ymm0 \n"
+ "vpavgb %%ymm4,%%ymm0,%%ymm0 \n"
+ "vshufps $0x88,%%ymm3,%%ymm2,%%ymm4 \n"
+ "vshufps $0xdd,%%ymm3,%%ymm2,%%ymm2 \n"
+ "vpavgb %%ymm4,%%ymm2,%%ymm2 \n"
+
+ "vpmaddubsw %%ymm7,%%ymm0,%%ymm1 \n"
+ "vpmaddubsw %%ymm7,%%ymm2,%%ymm3 \n"
+ "vpmaddubsw %%ymm6,%%ymm0,%%ymm0 \n"
+ "vpmaddubsw %%ymm6,%%ymm2,%%ymm2 \n"
+ "vphaddw %%ymm3,%%ymm1,%%ymm1 \n"
+ "vphaddw %%ymm2,%%ymm0,%%ymm0 \n"
+ "vpaddw %%ymm5,%%ymm0,%%ymm0 \n"
+ "vpaddw %%ymm5,%%ymm1,%%ymm1 \n"
+ "vpsraw $0x8,%%ymm1,%%ymm1 \n"
+ "vpsraw $0x8,%%ymm0,%%ymm0 \n"
+ "vpacksswb %%ymm0,%%ymm1,%%ymm0 \n"
+ "vpermq $0xd8,%%ymm0,%%ymm0 \n"
+ "vpshufb %8,%%ymm0,%%ymm0 \n"
+
+ "vextractf128 $0x0,%%ymm0,(%1) \n"
+ "vextractf128 $0x1,%%ymm0,0x0(%1,%2,1) \n"
+ "lea 0x10(%1),%1 \n"
+ "sub $0x20,%3 \n"
+ "jg 1b \n"
+ "vzeroupper \n"
+ : "+r"(src_abgr), // %0
+ "+r"(dst_u), // %1
+ "+r"(dst_v), // %2
+ "+rm"(width) // %3
+ : "r"((intptr_t)(src_stride_abgr)), // %4
+ "m"(kSub128), // %5
+ "m"(kABGRToVJ), // %6
+ "m"(kABGRToUJ), // %7
+ "m"(kShufARGBToUV_AVX) // %8
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6",
+ "xmm7");
+}
+#endif // HAS_ABGRTOUVJROW_AVX2
+
#ifdef HAS_ARGBTOUVJROW_SSSE3
void ARGBToUVJRow_SSSE3(const uint8_t* src_argb,
int src_stride_argb,
@@ -1831,6 +1946,72 @@ void ARGBToUVJRow_SSSE3(const uint8_t* src_argb,
}
#endif // HAS_ARGBTOUVJROW_SSSE3
+#ifdef HAS_ABGRTOUVJROW_SSSE3
+void ABGRToUVJRow_SSSE3(const uint8_t* src_abgr,
+ int src_stride_abgr,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width) {
+ asm volatile(
+ "movdqa %5,%%xmm3 \n"
+ "movdqa %6,%%xmm4 \n"
+ "movdqa %7,%%xmm5 \n"
+ "sub %1,%2 \n"
+
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm0 \n"
+ "movdqu 0x00(%0,%4,1),%%xmm7 \n"
+ "pavgb %%xmm7,%%xmm0 \n"
+ "movdqu 0x10(%0),%%xmm1 \n"
+ "movdqu 0x10(%0,%4,1),%%xmm7 \n"
+ "pavgb %%xmm7,%%xmm1 \n"
+ "movdqu 0x20(%0),%%xmm2 \n"
+ "movdqu 0x20(%0,%4,1),%%xmm7 \n"
+ "pavgb %%xmm7,%%xmm2 \n"
+ "movdqu 0x30(%0),%%xmm6 \n"
+ "movdqu 0x30(%0,%4,1),%%xmm7 \n"
+ "pavgb %%xmm7,%%xmm6 \n"
+
+ "lea 0x40(%0),%0 \n"
+ "movdqa %%xmm0,%%xmm7 \n"
+ "shufps $0x88,%%xmm1,%%xmm0 \n"
+ "shufps $0xdd,%%xmm1,%%xmm7 \n"
+ "pavgb %%xmm7,%%xmm0 \n"
+ "movdqa %%xmm2,%%xmm7 \n"
+ "shufps $0x88,%%xmm6,%%xmm2 \n"
+ "shufps $0xdd,%%xmm6,%%xmm7 \n"
+ "pavgb %%xmm7,%%xmm2 \n"
+ "movdqa %%xmm0,%%xmm1 \n"
+ "movdqa %%xmm2,%%xmm6 \n"
+ "pmaddubsw %%xmm4,%%xmm0 \n"
+ "pmaddubsw %%xmm4,%%xmm2 \n"
+ "pmaddubsw %%xmm3,%%xmm1 \n"
+ "pmaddubsw %%xmm3,%%xmm6 \n"
+ "phaddw %%xmm2,%%xmm0 \n"
+ "phaddw %%xmm6,%%xmm1 \n"
+ "paddw %%xmm5,%%xmm0 \n"
+ "paddw %%xmm5,%%xmm1 \n"
+ "psraw $0x8,%%xmm0 \n"
+ "psraw $0x8,%%xmm1 \n"
+ "packsswb %%xmm1,%%xmm0 \n"
+ "movlps %%xmm0,(%1) \n"
+ "movhps %%xmm0,0x00(%1,%2,1) \n"
+ "lea 0x8(%1),%1 \n"
+ "sub $0x10,%3 \n"
+ "jg 1b \n"
+ : "+r"(src_abgr), // %0
+ "+r"(dst_u), // %1
+ "+r"(dst_v), // %2
+ "+rm"(width) // %3
+ : "r"((intptr_t)(src_stride_abgr)), // %4
+ "m"(kABGRToVJ), // %5
+ "m"(kABGRToUJ), // %6
+ "m"(kSub128) // %7
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm6", "xmm7");
+}
+#endif // HAS_ABGRTOUVJROW_SSSE3
+
#ifdef HAS_ARGBTOUV444ROW_SSSE3
void ARGBToUV444Row_SSSE3(const uint8_t* src_argb,
uint8_t* dst_u,
@@ -2153,9 +2334,6 @@ void RGBAToUVRow_SSSE3(const uint8_t* src_rgba,
"lea 0x8(%[y_buf]),%[y_buf] \n"
// Read 4 UV from 422 10 bit, upsample to 8 UV
-// TODO(fbarchard): Consider shufb to replace pack/unpack
-// TODO(fbarchard): Consider pmulhuw to replace psraw
-// TODO(fbarchard): Consider pmullw to replace psllw and allow different bits.
#define READYUV210 \
"movq (%[u_buf]),%%xmm3 \n" \
"movq 0x00(%[u_buf],%[v_buf],1),%%xmm1 \n" \
@@ -2165,7 +2343,10 @@ void RGBAToUVRow_SSSE3(const uint8_t* src_rgba,
"packuswb %%xmm3,%%xmm3 \n" \
"punpcklwd %%xmm3,%%xmm3 \n" \
"movdqu (%[y_buf]),%%xmm4 \n" \
+ "movdqa %%xmm4,%%xmm2 \n" \
"psllw $6,%%xmm4 \n" \
+ "psrlw $4,%%xmm2 \n" \
+ "paddw %%xmm2,%%xmm4 \n" \
"lea 0x10(%[y_buf]),%[y_buf] \n"
#define READYUVA210 \
@@ -2177,7 +2358,10 @@ void RGBAToUVRow_SSSE3(const uint8_t* src_rgba,
"packuswb %%xmm3,%%xmm3 \n" \
"punpcklwd %%xmm3,%%xmm3 \n" \
"movdqu (%[y_buf]),%%xmm4 \n" \
+ "movdqa %%xmm4,%%xmm2 \n" \
"psllw $6,%%xmm4 \n" \
+ "psrlw $4,%%xmm2 \n" \
+ "paddw %%xmm2,%%xmm4 \n" \
"lea 0x10(%[y_buf]),%[y_buf] \n" \
"movdqu (%[a_buf]),%%xmm5 \n" \
"psraw $2,%%xmm5 \n" \
@@ -2196,7 +2380,10 @@ void RGBAToUVRow_SSSE3(const uint8_t* src_rgba,
"punpckhwd %%xmm2,%%xmm1 \n" \
"packuswb %%xmm1,%%xmm3 \n" \
"movdqu (%[y_buf]),%%xmm4 \n" \
+ "movdqa %%xmm4,%%xmm2 \n" \
"psllw $6,%%xmm4 \n" \
+ "psrlw $4,%%xmm2 \n" \
+ "paddw %%xmm2,%%xmm4 \n" \
"lea 0x10(%[y_buf]),%[y_buf] \n"
// Read 8 UV from 444 10 bit. With 8 Alpha.
@@ -2211,7 +2398,10 @@ void RGBAToUVRow_SSSE3(const uint8_t* src_rgba,
"punpckhwd %%xmm2,%%xmm1 \n" \
"packuswb %%xmm1,%%xmm3 \n" \
"movdqu (%[y_buf]),%%xmm4 \n" \
- "psllw $0x6,%%xmm4 \n" \
+ "movdqa %%xmm4,%%xmm2 \n" \
+ "psllw $6,%%xmm4 \n" \
+ "psrlw $4,%%xmm2 \n" \
+ "paddw %%xmm2,%%xmm4 \n" \
"lea 0x10(%[y_buf]),%[y_buf] \n" \
"movdqu (%[a_buf]),%%xmm5 \n" \
"psraw $2,%%xmm5 \n" \
@@ -2228,7 +2418,10 @@ void RGBAToUVRow_SSSE3(const uint8_t* src_rgba,
"packuswb %%xmm3,%%xmm3 \n" \
"punpcklwd %%xmm3,%%xmm3 \n" \
"movdqu (%[y_buf]),%%xmm4 \n" \
- "psllw $0x4,%%xmm4 \n" \
+ "movdqa %%xmm4,%%xmm2 \n" \
+ "psllw $4,%%xmm4 \n" \
+ "psrlw $8,%%xmm2 \n" \
+ "paddw %%xmm2,%%xmm4 \n" \
"lea 0x10(%[y_buf]),%[y_buf] \n"
// Read 4 UV from 422, upsample to 8 UV. With 8 Alpha.
@@ -2399,6 +2592,20 @@ void RGBAToUVRow_SSSE3(const uint8_t* src_rgba,
"movdqu %%xmm0,0x10(%[dst_rgba]) \n" \
"lea 0x20(%[dst_rgba]),%[dst_rgba] \n"
+// Store 8 RGB24 values.
+#define STORERGB24 \
+ "punpcklbw %%xmm1,%%xmm0 \n" \
+ "punpcklbw %%xmm2,%%xmm2 \n" \
+ "movdqa %%xmm0,%%xmm1 \n" \
+ "punpcklwd %%xmm2,%%xmm0 \n" \
+ "punpckhwd %%xmm2,%%xmm1 \n" \
+ "pshufb %%xmm5,%%xmm0 \n" \
+ "pshufb %%xmm6,%%xmm1 \n" \
+ "palignr $0xc,%%xmm0,%%xmm1 \n" \
+ "movq %%xmm0,(%[dst_rgb24]) \n" \
+ "movdqu %%xmm1,0x8(%[dst_rgb24]) \n" \
+ "lea 0x18(%[dst_rgb24]),%[dst_rgb24] \n"
+
// Store 8 AR30 values.
#define STOREAR30 \
"psraw $0x4,%%xmm0 \n" \
@@ -2508,17 +2715,43 @@ void OMITFP I422ToRGB24Row_SSSE3(const uint8_t* y_buf,
"1: \n"
READYUV422
YUVTORGB(yuvconstants)
- "punpcklbw %%xmm1,%%xmm0 \n"
- "punpcklbw %%xmm2,%%xmm2 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "punpcklwd %%xmm2,%%xmm0 \n"
- "punpckhwd %%xmm2,%%xmm1 \n"
- "pshufb %%xmm5,%%xmm0 \n"
- "pshufb %%xmm6,%%xmm1 \n"
- "palignr $0xc,%%xmm0,%%xmm1 \n"
- "movq %%xmm0,(%[dst_rgb24]) \n"
- "movdqu %%xmm1,0x8(%[dst_rgb24]) \n"
- "lea 0x18(%[dst_rgb24]),%[dst_rgb24] \n"
+ STORERGB24
+ "subl $0x8,%[width] \n"
+ "jg 1b \n"
+ : [y_buf]"+r"(y_buf), // %[y_buf]
+ [u_buf]"+r"(u_buf), // %[u_buf]
+ [v_buf]"+r"(v_buf), // %[v_buf]
+ [dst_rgb24]"+r"(dst_rgb24), // %[dst_rgb24]
+#if defined(__i386__)
+ [width]"+m"(width) // %[width]
+#else
+ [width]"+rm"(width) // %[width]
+#endif
+ : [yuvconstants]"r"(yuvconstants), // %[yuvconstants]
+ [kShuffleMaskARGBToRGB24_0]"m"(kShuffleMaskARGBToRGB24_0),
+ [kShuffleMaskARGBToRGB24]"m"(kShuffleMaskARGBToRGB24)
+ : "memory", "cc", YUVTORGB_REGS
+ "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"
+ );
+}
+
+void OMITFP I444ToRGB24Row_SSSE3(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_rgb24,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ asm volatile (
+ YUVTORGB_SETUP(yuvconstants)
+ "movdqa %[kShuffleMaskARGBToRGB24_0],%%xmm5 \n"
+ "movdqa %[kShuffleMaskARGBToRGB24],%%xmm6 \n"
+ "sub %[u_buf],%[v_buf] \n"
+
+ LABELALIGN
+ "1: \n"
+ READYUV444
+ YUVTORGB(yuvconstants)
+ STORERGB24
"subl $0x8,%[width] \n"
"jg 1b \n"
: [y_buf]"+r"(y_buf), // %[y_buf]
@@ -3209,7 +3442,9 @@ void OMITFP I422ToRGBARow_SSSE3(const uint8_t* y_buf,
"vpackuswb %%ymm3,%%ymm3,%%ymm3 \n" \
"vpunpcklwd %%ymm3,%%ymm3,%%ymm3 \n" \
"vmovdqu (%[y_buf]),%%ymm4 \n" \
- "vpsllw $6,%%ymm4,%%ymm4 \n" \
+ "vpsllw $6,%%ymm4,%%ymm2 \n" \
+ "vpsrlw $4,%%ymm4,%%ymm4 \n" \
+ "vpaddw %%ymm2,%%ymm4,%%ymm4 \n" \
"lea 0x20(%[y_buf]),%[y_buf] \n"
// Read 8 UV from 210, upsample to 16 UV. With 16 Alpha.
@@ -3224,7 +3459,9 @@ void OMITFP I422ToRGBARow_SSSE3(const uint8_t* y_buf,
"vpackuswb %%ymm3,%%ymm3,%%ymm3 \n" \
"vpunpcklwd %%ymm3,%%ymm3,%%ymm3 \n" \
"vmovdqu (%[y_buf]),%%ymm4 \n" \
- "vpsllw $6,%%ymm4,%%ymm4 \n" \
+ "vpsllw $6,%%ymm4,%%ymm2 \n" \
+ "vpsrlw $4,%%ymm4,%%ymm4 \n" \
+ "vpaddw %%ymm2,%%ymm4,%%ymm4 \n" \
"lea 0x20(%[y_buf]),%[y_buf] \n" \
"vmovdqu (%[a_buf]),%%ymm5 \n" \
"vpsraw $2,%%ymm5,%%ymm5 \n" \
@@ -3242,7 +3479,9 @@ void OMITFP I422ToRGBARow_SSSE3(const uint8_t* y_buf,
"vpunpcklwd %%ymm2,%%ymm3,%%ymm3 \n" \
"vpackuswb %%ymm1,%%ymm3,%%ymm3 \n" \
"vmovdqu (%[y_buf]),%%ymm4 \n" \
- "vpsllw $6,%%ymm4,%%ymm4 \n" \
+ "vpsllw $6,%%ymm4,%%ymm2 \n" \
+ "vpsrlw $4,%%ymm4,%%ymm4 \n" \
+ "vpaddw %%ymm2,%%ymm4,%%ymm4 \n" \
"lea 0x20(%[y_buf]),%[y_buf] \n"
// Read 8 UV from 212 12 bit, upsample to 16 UV
@@ -3257,7 +3496,9 @@ void OMITFP I422ToRGBARow_SSSE3(const uint8_t* y_buf,
"vpackuswb %%ymm3,%%ymm3,%%ymm3 \n" \
"vpunpcklwd %%ymm3,%%ymm3,%%ymm3 \n" \
"vmovdqu (%[y_buf]),%%ymm4 \n" \
- "vpsllw $0x4,%%ymm4,%%ymm4 \n" \
+ "vpsllw $4,%%ymm4,%%ymm2 \n" \
+ "vpsrlw $8,%%ymm4,%%ymm4 \n" \
+ "vpaddw %%ymm2,%%ymm4,%%ymm4 \n" \
"lea 0x20(%[y_buf]),%[y_buf] \n"
// Read 16 UV from 410. With 16 Alpha.
@@ -3271,7 +3512,9 @@ void OMITFP I422ToRGBARow_SSSE3(const uint8_t* y_buf,
"vpunpcklwd %%ymm2,%%ymm3,%%ymm3 \n" \
"vpackuswb %%ymm1,%%ymm3,%%ymm3 \n" \
"vmovdqu (%[y_buf]),%%ymm4 \n" \
- "vpsllw $6,%%ymm4,%%ymm4 \n" \
+ "vpsllw $6,%%ymm4,%%ymm2 \n" \
+ "vpsrlw $4,%%ymm4,%%ymm4 \n" \
+ "vpaddw %%ymm2,%%ymm4,%%ymm4 \n" \
"lea 0x20(%[y_buf]),%[y_buf] \n" \
"vmovdqu (%[a_buf]),%%ymm5 \n" \
"vpsraw $2,%%ymm5,%%ymm5 \n" \
@@ -4785,6 +5028,84 @@ void DetileRow_SSE2(const uint8_t* src,
}
#endif // HAS_DETILEROW_SSE2
+#ifdef HAS_DETILEROW_16_SSE2
+void DetileRow_16_SSE2(const uint16_t* src,
+ ptrdiff_t src_tile_stride,
+ uint16_t* dst,
+ int width) {
+ asm volatile(
+ "1: \n"
+ "movdqu (%0),%%xmm0 \n"
+ "movdqu 0x10(%0),%%xmm1 \n"
+ "lea (%0,%3,2),%0 \n"
+ "movdqu %%xmm0,(%1) \n"
+ "movdqu %%xmm1,0x10(%1) \n"
+ "lea 0x20(%1),%1 \n"
+ "sub $0x10,%2 \n"
+ "jg 1b \n"
+ : "+r"(src), // %0
+ "+r"(dst), // %1
+ "+r"(width) // %2
+ : "r"(src_tile_stride) // %3
+ : "cc", "memory", "xmm0", "xmm1");
+}
+#endif // HAS_DETILEROW_SSE2
+
+#ifdef HAS_DETILEROW_16_AVX
+void DetileRow_16_AVX(const uint16_t* src,
+ ptrdiff_t src_tile_stride,
+ uint16_t* dst,
+ int width) {
+ asm volatile(
+ "1: \n"
+ "vmovdqu (%0),%%ymm0 \n"
+ "lea (%0,%3,2),%0 \n"
+ "vmovdqu %%ymm0,(%1) \n"
+ "lea 0x20(%1),%1 \n"
+ "sub $0x10,%2 \n"
+ "jg 1b \n"
+ "vzeroupper \n"
+ : "+r"(src), // %0
+ "+r"(dst), // %1
+ "+r"(width) // %2
+ : "r"(src_tile_stride) // %3
+ : "cc", "memory", "xmm0");
+}
+#endif // HAS_DETILEROW_AVX
+
+#ifdef HAS_DETILETOYUY2_SSE2
+// Read 16 Y, 8 UV, and write 8 YUYV.
+void DetileToYUY2_SSE2(const uint8_t* src_y,
+ ptrdiff_t src_y_tile_stride,
+ const uint8_t* src_uv,
+ ptrdiff_t src_uv_tile_stride,
+ uint8_t* dst_yuy2,
+ int width) {
+ asm volatile(
+ "1: \n"
+ "movdqu (%0),%%xmm0 \n" // Load 16 Y
+ "sub $0x10,%3 \n"
+ "lea (%0,%4),%0 \n"
+ "movdqu (%1),%%xmm1 \n" // Load 8 UV
+ "lea (%1,%5),%1 \n"
+ "movdqu %%xmm0,%%xmm2 \n"
+ "punpcklbw %%xmm1,%%xmm0 \n"
+ "punpckhbw %%xmm1,%%xmm2 \n"
+ "movdqu %%xmm0,(%2) \n"
+ "movdqu %%xmm2,0x10(%2) \n"
+ "lea 0x20(%2),%2 \n"
+ "jg 1b \n"
+ : "+r"(src_y), // %0
+ "+r"(src_uv), // %1
+ "+r"(dst_yuy2), // %2
+ "+r"(width) // %3
+ : "r"(src_y_tile_stride), // %4
+ "r"(src_uv_tile_stride) // %5
+ : "cc", "memory", "xmm0", "xmm1", "xmm2" // Clobber list
+ );
+}
+#endif
+
#ifdef HAS_DETILESPLITUVROW_SSSE3
// TODO(greenjustin): Look into generating these constants instead of loading
// them since this can cause branch mispredicts for fPIC code on 32-bit
@@ -4821,36 +5142,59 @@ void DetileSplitUVRow_SSSE3(const uint8_t* src_uv,
}
#endif // HAS_DETILESPLITUVROW_SSSE3
+#ifdef HAS_MERGEUVROW_AVX512BW
+void MergeUVRow_AVX512BW(const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_uv,
+ int width) {
+ asm volatile("sub %0,%1 \n"
+
+ LABELALIGN
+ "1: \n"
+ "vpmovzxbw (%0),%%zmm0 \n"
+ "vpmovzxbw 0x00(%0,%1,1),%%zmm1 \n"
+ "lea 0x20(%0),%0 \n"
+ "vpsllw $0x8,%%zmm1,%%zmm1 \n"
+ "vporq %%zmm0,%%zmm1,%%zmm2 \n"
+ "vmovdqu64 %%zmm2,(%2) \n"
+ "lea 0x40(%2),%2 \n"
+ "sub $0x20,%3 \n"
+ "jg 1b \n"
+ "vzeroupper \n"
+ : "+r"(src_u), // %0
+ "+r"(src_v), // %1
+ "+r"(dst_uv), // %2
+ "+r"(width) // %3
+ :
+ : "memory", "cc", "xmm0", "xmm1", "xmm2");
+}
+#endif // HAS_MERGEUVROW_AVX512BW
+
#ifdef HAS_MERGEUVROW_AVX2
void MergeUVRow_AVX2(const uint8_t* src_u,
const uint8_t* src_v,
uint8_t* dst_uv,
int width) {
- asm volatile(
-
- "sub %0,%1 \n"
+ asm volatile("sub %0,%1 \n"
- LABELALIGN
+ LABELALIGN
"1: \n"
- "vmovdqu (%0),%%ymm0 \n"
- "vmovdqu 0x00(%0,%1,1),%%ymm1 \n"
- "lea 0x20(%0),%0 \n"
- "vpunpcklbw %%ymm1,%%ymm0,%%ymm2 \n"
- "vpunpckhbw %%ymm1,%%ymm0,%%ymm0 \n"
- "vextractf128 $0x0,%%ymm2,(%2) \n"
- "vextractf128 $0x0,%%ymm0,0x10(%2) \n"
- "vextractf128 $0x1,%%ymm2,0x20(%2) \n"
- "vextractf128 $0x1,%%ymm0,0x30(%2) \n"
- "lea 0x40(%2),%2 \n"
- "sub $0x20,%3 \n"
+ "vpmovzxbw (%0),%%ymm0 \n"
+ "vpmovzxbw 0x00(%0,%1,1),%%ymm1 \n"
+ "lea 0x10(%0),%0 \n"
+ "vpsllw $0x8,%%ymm1,%%ymm1 \n"
+ "vpor %%ymm0,%%ymm1,%%ymm2 \n"
+ "vmovdqu %%ymm2,(%2) \n"
+ "lea 0x20(%2),%2 \n"
+ "sub $0x10,%3 \n"
"jg 1b \n"
"vzeroupper \n"
- : "+r"(src_u), // %0
- "+r"(src_v), // %1
- "+r"(dst_uv), // %2
- "+r"(width) // %3
- :
- : "memory", "cc", "xmm0", "xmm1", "xmm2");
+ : "+r"(src_u), // %0
+ "+r"(src_v), // %1
+ "+r"(dst_uv), // %2
+ "+r"(width) // %3
+ :
+ : "memory", "cc", "xmm0", "xmm1", "xmm2");
}
#endif // HAS_MERGEUVROW_AVX2
@@ -4859,11 +5203,9 @@ void MergeUVRow_SSE2(const uint8_t* src_u,
const uint8_t* src_v,
uint8_t* dst_uv,
int width) {
- asm volatile(
+ asm volatile("sub %0,%1 \n"
- "sub %0,%1 \n"
-
- LABELALIGN
+ LABELALIGN
"1: \n"
"movdqu (%0),%%xmm0 \n"
"movdqu 0x00(%0,%1,1),%%xmm1 \n"
@@ -4876,12 +5218,12 @@ void MergeUVRow_SSE2(const uint8_t* src_u,
"lea 0x20(%2),%2 \n"
"sub $0x10,%3 \n"
"jg 1b \n"
- : "+r"(src_u), // %0
- "+r"(src_v), // %1
- "+r"(dst_uv), // %2
- "+r"(width) // %3
- :
- : "memory", "cc", "xmm0", "xmm1", "xmm2");
+ : "+r"(src_u), // %0
+ "+r"(src_v), // %1
+ "+r"(dst_uv), // %2
+ "+r"(width) // %3
+ :
+ : "memory", "cc", "xmm0", "xmm1", "xmm2");
}
#endif // HAS_MERGEUVROW_SSE2
@@ -4891,37 +5233,35 @@ void MergeUVRow_16_AVX2(const uint16_t* src_u,
uint16_t* dst_uv,
int depth,
int width) {
- depth = 16 - depth;
// clang-format off
asm volatile (
"vmovd %4,%%xmm3 \n"
+ "vmovd %5,%%xmm4 \n"
+
+
"sub %0,%1 \n"
+ // 8 pixels per loop.
- // 16 pixels per loop.
- LABELALIGN
+ LABELALIGN
"1: \n"
- "vmovdqu (%0),%%ymm0 \n"
- "vmovdqu (%0,%1,1),%%ymm1 \n"
- "add $0x20,%0 \n"
-
+ "vpmovzxwd (%0),%%ymm0 \n"
+ "vpmovzxwd 0x00(%0,%1,1),%%ymm1 \n"
+ "lea 0x10(%0),%0 \n"
"vpsllw %%xmm3,%%ymm0,%%ymm0 \n"
- "vpsllw %%xmm3,%%ymm1,%%ymm1 \n"
- "vpunpcklwd %%ymm1,%%ymm0,%%ymm2 \n" // mutates
- "vpunpckhwd %%ymm1,%%ymm0,%%ymm0 \n"
- "vextractf128 $0x0,%%ymm2,(%2) \n"
- "vextractf128 $0x0,%%ymm0,0x10(%2) \n"
- "vextractf128 $0x1,%%ymm2,0x20(%2) \n"
- "vextractf128 $0x1,%%ymm0,0x30(%2) \n"
- "add $0x40,%2 \n"
- "sub $0x10,%3 \n"
+ "vpslld %%xmm4,%%ymm1,%%ymm1 \n"
+ "vpor %%ymm0,%%ymm1,%%ymm2 \n"
+ "vmovdqu %%ymm2,(%2) \n"
+ "lea 0x20(%2),%2 \n"
+ "sub $0x8,%3 \n"
"jg 1b \n"
"vzeroupper \n"
- : "+r"(src_u), // %0
- "+r"(src_v), // %1
- "+r"(dst_uv), // %2
- "+r"(width) // %3
- : "r"(depth) // %4
- : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3");
+ : "+r"(src_u), // %0
+ "+r"(src_v), // %1
+ "+r"(dst_uv), // %2
+ "+r"(width) // %3
+ : "r"(16 - depth), // %4
+ "r"(32 - depth) // %5
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4");
// clang-format on
}
#endif // HAS_MERGEUVROW_AVX2
@@ -5127,7 +5467,6 @@ void Convert16To8Row_AVX2(const uint16_t* src_y,
// 512 = 9 bits
// 1024 = 10 bits
// 4096 = 12 bits
-// TODO(fbarchard): reduce to SSE2
void Convert8To16Row_SSE2(const uint8_t* src_y,
uint16_t* dst_y,
int scale,
@@ -6178,6 +6517,7 @@ void CopyRow_AVX(const uint8_t* src, uint8_t* dst, int width) {
"lea 0x40(%1),%1 \n"
"sub $0x40,%2 \n"
"jg 1b \n"
+ "vzeroupper \n"
: "+r"(src), // %0
"+r"(dst), // %1
"+r"(width) // %2
@@ -6461,6 +6801,33 @@ void YUY2ToYRow_SSE2(const uint8_t* src_yuy2, uint8_t* dst_y, int width) {
: "memory", "cc", "xmm0", "xmm1", "xmm5");
}
+void YUY2ToNVUVRow_SSE2(const uint8_t* src_yuy2,
+ int stride_yuy2,
+ uint8_t* dst_uv,
+ int width) {
+ asm volatile(LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm0 \n"
+ "movdqu 0x10(%0),%%xmm1 \n"
+ "movdqu 0x00(%0,%3,1),%%xmm2 \n"
+ "movdqu 0x10(%0,%3,1),%%xmm3 \n"
+ "lea 0x20(%0),%0 \n"
+ "pavgb %%xmm2,%%xmm0 \n"
+ "pavgb %%xmm3,%%xmm1 \n"
+ "psrlw $0x8,%%xmm0 \n"
+ "psrlw $0x8,%%xmm1 \n"
+ "packuswb %%xmm1,%%xmm0 \n"
+ "movdqu %%xmm0,(%1) \n"
+ "lea 0x10(%1),%1 \n"
+ "sub $0x10,%2 \n"
+ "jg 1b \n"
+ : "+r"(src_yuy2), // %0
+ "+r"(dst_uv), // %1
+ "+r"(width) // %2
+ : "r"((intptr_t)(stride_yuy2)) // %3
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3");
+}
+
void YUY2ToUVRow_SSE2(const uint8_t* src_yuy2,
int stride_yuy2,
uint8_t* dst_u,
@@ -6661,6 +7028,35 @@ void YUY2ToYRow_AVX2(const uint8_t* src_yuy2, uint8_t* dst_y, int width) {
: "memory", "cc", "xmm0", "xmm1", "xmm5");
}
+void YUY2ToNVUVRow_AVX2(const uint8_t* src_yuy2,
+ int stride_yuy2,
+ uint8_t* dst_uv,
+ int width) {
+ asm volatile(
+
+ LABELALIGN
+ "1: \n"
+ "vmovdqu (%0),%%ymm0 \n"
+ "vmovdqu 0x20(%0),%%ymm1 \n"
+ "vpavgb 0x00(%0,%3,1),%%ymm0,%%ymm0 \n"
+ "vpavgb 0x20(%0,%3,1),%%ymm1,%%ymm1 \n"
+ "lea 0x40(%0),%0 \n"
+ "vpsrlw $0x8,%%ymm0,%%ymm0 \n"
+ "vpsrlw $0x8,%%ymm1,%%ymm1 \n"
+ "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n"
+ "vpermq $0xd8,%%ymm0,%%ymm0 \n"
+ "vmovdqu %%ymm0,(%1) \n"
+ "lea 0x20(%1),%1 \n"
+ "sub $0x20,%2 \n"
+ "jg 1b \n"
+ "vzeroupper \n"
+ : "+r"(src_yuy2), // %0
+ "+r"(dst_uv), // %1
+ "+r"(width) // %2
+ : "r"((intptr_t)(stride_yuy2)) // %3
+ : "memory", "cc", "xmm0", "xmm1");
+}
+
void YUY2ToUVRow_AVX2(const uint8_t* src_yuy2,
int stride_yuy2,
uint8_t* dst_u,
@@ -7045,93 +7441,106 @@ void BlendPlaneRow_AVX2(const uint8_t* src0,
#ifdef HAS_ARGBATTENUATEROW_SSSE3
// Shuffle table duplicating alpha.
-static const uvec8 kShuffleAlpha0 = {3u, 3u, 3u, 3u, 3u, 3u, 128u, 128u,
- 7u, 7u, 7u, 7u, 7u, 7u, 128u, 128u};
-static const uvec8 kShuffleAlpha1 = {11u, 11u, 11u, 11u, 11u, 11u, 128u, 128u,
- 15u, 15u, 15u, 15u, 15u, 15u, 128u, 128u};
+static const vec8 kAttenuateShuffle = {6, -128, 6, -128, 6, -128,
+ -128, -128, 14, -128, 14, -128,
+ 14, -128, -128, -128};
+
// Attenuate 4 pixels at a time.
void ARGBAttenuateRow_SSSE3(const uint8_t* src_argb,
uint8_t* dst_argb,
int width) {
asm volatile(
- "pcmpeqb %%xmm3,%%xmm3 \n"
- "pslld $0x18,%%xmm3 \n"
"movdqa %3,%%xmm4 \n"
- "movdqa %4,%%xmm5 \n"
+ "pcmpeqb %%xmm5,%%xmm5 \n"
+ "pslld $0x18,%%xmm5 \n"
+ "pxor %%xmm6,%%xmm6 \n"
+ "pcmpeqb %%xmm7,%%xmm7 \n"
+ "punpcklbw %%xmm6,%%xmm7 \n"
+ "sub %0,%1 \n"
// 4 pixel loop.
LABELALIGN
"1: \n"
- "movdqu (%0),%%xmm0 \n"
- "pshufb %%xmm4,%%xmm0 \n"
- "movdqu (%0),%%xmm1 \n"
- "punpcklbw %%xmm1,%%xmm1 \n"
- "pmulhuw %%xmm1,%%xmm0 \n"
- "movdqu (%0),%%xmm1 \n"
- "pshufb %%xmm5,%%xmm1 \n"
- "movdqu (%0),%%xmm2 \n"
- "punpckhbw %%xmm2,%%xmm2 \n"
- "pmulhuw %%xmm2,%%xmm1 \n"
- "movdqu (%0),%%xmm2 \n"
- "lea 0x10(%0),%0 \n"
- "pand %%xmm3,%%xmm2 \n"
+ "movdqu (%0),%%xmm6 \n"
+ "movdqa %%xmm6,%%xmm0 \n"
+ "movdqa %%xmm6,%%xmm1 \n"
+ "punpcklbw %%xmm5,%%xmm0 \n"
+ "punpckhbw %%xmm5,%%xmm1 \n"
+ "movdqa %%xmm0,%%xmm2 \n"
+ "movdqa %%xmm1,%%xmm3 \n"
+ "pshufb %%xmm4,%%xmm2 \n" // a,a,a,0
+ "pshufb %%xmm4,%%xmm3 \n"
+ "pmullw %%xmm2,%%xmm0 \n" // rgb * alpha
+ "pmullw %%xmm3,%%xmm1 \n"
+ "paddw %%xmm7,%%xmm0 \n" // + 255
+ "paddw %%xmm7,%%xmm1 \n"
"psrlw $0x8,%%xmm0 \n"
"psrlw $0x8,%%xmm1 \n"
"packuswb %%xmm1,%%xmm0 \n"
- "por %%xmm2,%%xmm0 \n"
- "movdqu %%xmm0,(%1) \n"
- "lea 0x10(%1),%1 \n"
+ "pand %%xmm5,%%xmm6 \n"
+ "por %%xmm6,%%xmm0 \n"
+ "movdqu %%xmm0,(%0,%1) \n"
+ "lea 0x10(%0),%0 \n"
"sub $0x4,%2 \n"
"jg 1b \n"
- : "+r"(src_argb), // %0
- "+r"(dst_argb), // %1
- "+r"(width) // %2
- : "m"(kShuffleAlpha0), // %3
- "m"(kShuffleAlpha1) // %4
- : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5");
+ : "+r"(src_argb), // %0
+ "+r"(dst_argb), // %1
+ "+r"(width) // %2
+ : "m"(kAttenuateShuffle) // %3
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6",
+ "xmm7");
}
#endif // HAS_ARGBATTENUATEROW_SSSE3
#ifdef HAS_ARGBATTENUATEROW_AVX2
+
// Shuffle table duplicating alpha.
-static const uvec8 kShuffleAlpha_AVX2 = {6u, 7u, 6u, 7u, 6u, 7u,
- 128u, 128u, 14u, 15u, 14u, 15u,
- 14u, 15u, 128u, 128u};
+static const lvec8 kAttenuateShuffle_AVX2 = {
+ 6, -128, 6, -128, 6, -128, -128, -128, 14, -128, 14,
+ -128, 14, -128, -128, -128, 22, -128, 22, -128, 22, -128,
+ -128, -128, 30, -128, 30, -128, 30, -128, -128, -128};
+
// Attenuate 8 pixels at a time.
void ARGBAttenuateRow_AVX2(const uint8_t* src_argb,
uint8_t* dst_argb,
int width) {
asm volatile(
- "vbroadcastf128 %3,%%ymm4 \n"
+ "vmovdqa %3,%%ymm4 \n"
"vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
"vpslld $0x18,%%ymm5,%%ymm5 \n"
+ "vpxor %%ymm6,%%ymm6,%%ymm6 \n"
+ "vpcmpeqb %%ymm7,%%ymm7,%%ymm7 \n"
+ "vpunpcklbw %%ymm6,%%ymm7,%%ymm7 \n"
"sub %0,%1 \n"
// 8 pixel loop.
LABELALIGN
"1: \n"
"vmovdqu (%0),%%ymm6 \n"
- "vpunpcklbw %%ymm6,%%ymm6,%%ymm0 \n"
- "vpunpckhbw %%ymm6,%%ymm6,%%ymm1 \n"
+ "vpunpcklbw %%ymm5,%%ymm6,%%ymm0 \n"
+ "vpunpckhbw %%ymm5,%%ymm6,%%ymm1 \n"
"vpshufb %%ymm4,%%ymm0,%%ymm2 \n"
"vpshufb %%ymm4,%%ymm1,%%ymm3 \n"
- "vpmulhuw %%ymm2,%%ymm0,%%ymm0 \n"
- "vpmulhuw %%ymm3,%%ymm1,%%ymm1 \n"
- "vpand %%ymm5,%%ymm6,%%ymm6 \n"
+ "vpmullw %%ymm2,%%ymm0,%%ymm0 \n"
+ "vpmullw %%ymm3,%%ymm1,%%ymm1 \n"
+ "vpaddw %%ymm7,%%ymm0,%%ymm0 \n"
+ "vpaddw %%ymm7,%%ymm1,%%ymm1 \n"
"vpsrlw $0x8,%%ymm0,%%ymm0 \n"
"vpsrlw $0x8,%%ymm1,%%ymm1 \n"
"vpackuswb %%ymm1,%%ymm0,%%ymm0 \n"
- "vpor %%ymm6,%%ymm0,%%ymm0 \n"
+ "vpand %%ymm5,%%ymm6,%%ymm1 \n"
+ "vpor %%ymm1,%%ymm0,%%ymm0 \n"
"vmovdqu %%ymm0,0x00(%0,%1,1) \n"
"lea 0x20(%0),%0 \n"
"sub $0x8,%2 \n"
"jg 1b \n"
"vzeroupper \n"
- : "+r"(src_argb), // %0
- "+r"(dst_argb), // %1
- "+r"(width) // %2
- : "m"(kShuffleAlpha_AVX2) // %3
- : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6");
+ : "+r"(src_argb), // %0
+ "+r"(dst_argb), // %1
+ "+r"(width) // %2
+ : "m"(kAttenuateShuffle_AVX2) // %3
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6",
+ "xmm7");
}
#endif // HAS_ARGBATTENUATEROW_AVX2
diff --git a/files/source/row_lasx.cc b/source/row_lasx.cc
index 7dd18f40..be85022e 100644
--- a/files/source/row_lasx.cc
+++ b/source/row_lasx.cc
@@ -543,8 +543,8 @@ void I422ToARGB4444Row_LASX(const uint8_t* src_y,
__m256i vec_yb, vec_yg, vec_ub, vec_vr, vec_ug, vec_vg;
__m256i vec_ubvr, vec_ugvg;
__m256i const_0x80 = __lasx_xvldi(0x80);
- __m256i alpha = {0xF000F000F000F000, 0xF000F000F000F000, 0xF000F000F000F000,
- 0xF000F000F000F000};
+ __m256i alpha = (__m256i)v4u64{0xF000F000F000F000, 0xF000F000F000F000,
+ 0xF000F000F000F000, 0xF000F000F000F000};
__m256i mask = {0x00F000F000F000F0, 0x00F000F000F000F0, 0x00F000F000F000F0,
0x00F000F000F000F0};
@@ -595,8 +595,8 @@ void I422ToARGB1555Row_LASX(const uint8_t* src_y,
__m256i vec_yb, vec_yg, vec_ub, vec_vr, vec_ug, vec_vg;
__m256i vec_ubvr, vec_ugvg;
__m256i const_0x80 = __lasx_xvldi(0x80);
- __m256i alpha = {0x8000800080008000, 0x8000800080008000, 0x8000800080008000,
- 0x8000800080008000};
+ __m256i alpha = (__m256i)v4u64{0x8000800080008000, 0x8000800080008000,
+ 0x8000800080008000, 0x8000800080008000};
YUVTORGB_SETUP(yuvconstants, vec_ub, vec_vr, vec_ug, vec_vg, vec_yg, vec_yb);
vec_ubvr = __lasx_xvilvl_h(vec_ub, vec_vr);
@@ -775,40 +775,6 @@ void UYVYToUV422Row_LASX(const uint8_t* src_uyvy,
}
}
-void ARGBToYRow_LASX(const uint8_t* src_argb0, uint8_t* dst_y, int width) {
- int x;
- int len = width / 32;
- __m256i src0, src1, src2, src3, vec0, vec1, vec2, vec3;
- __m256i tmp0, tmp1, dst0;
- __m256i const_19 = __lasx_xvldi(0x19);
- __m256i const_42 = __lasx_xvldi(0x42);
- __m256i const_81 = __lasx_xvldi(0x81);
- __m256i const_1080 = {0x1080108010801080, 0x1080108010801080,
- 0x1080108010801080, 0x1080108010801080};
- __m256i control = {0x0000000400000000, 0x0000000500000001, 0x0000000600000002,
- 0x0000000700000003};
-
- for (x = 0; x < len; x++) {
- DUP4_ARG2(__lasx_xvld, src_argb0, 0, src_argb0, 32, src_argb0, 64,
- src_argb0, 96, src0, src1, src2, src3);
- vec0 = __lasx_xvpickev_b(src1, src0);
- vec1 = __lasx_xvpickev_b(src3, src2);
- vec2 = __lasx_xvpickod_b(src1, src0);
- vec3 = __lasx_xvpickod_b(src3, src2);
- tmp0 = __lasx_xvmaddwev_h_bu(const_1080, vec0, const_19);
- tmp1 = __lasx_xvmaddwev_h_bu(const_1080, vec1, const_19);
- tmp0 = __lasx_xvmaddwev_h_bu(tmp0, vec2, const_81);
- tmp1 = __lasx_xvmaddwev_h_bu(tmp1, vec3, const_81);
- tmp0 = __lasx_xvmaddwod_h_bu(tmp0, vec0, const_42);
- tmp1 = __lasx_xvmaddwod_h_bu(tmp1, vec1, const_42);
- dst0 = __lasx_xvssrani_b_h(tmp1, tmp0, 8);
- dst0 = __lasx_xvperm_w(dst0, control);
- __lasx_xvst(dst0, dst_y, 0);
- src_argb0 += 128;
- dst_y += 32;
- }
-}
-
void ARGBToUVRow_LASX(const uint8_t* src_argb0,
int src_stride_argb,
uint8_t* dst_u,
@@ -833,8 +799,8 @@ void ARGBToUVRow_LASX(const uint8_t* src_argb0,
0x0009000900090009, 0x0009000900090009};
__m256i control = {0x0000000400000000, 0x0000000500000001, 0x0000000600000002,
0x0000000700000003};
- __m256i const_0x8080 = {0x8080808080808080, 0x8080808080808080,
- 0x8080808080808080, 0x8080808080808080};
+ __m256i const_0x8080 = (__m256i)v4u64{0x8080808080808080, 0x8080808080808080,
+ 0x8080808080808080, 0x8080808080808080};
for (x = 0; x < len; x++) {
DUP4_ARG2(__lasx_xvld, src_argb0, 0, src_argb0, 32, src_argb0, 64,
@@ -1071,8 +1037,8 @@ void ARGBToUV444Row_LASX(const uint8_t* src_argb,
__m256i const_38 = __lasx_xvldi(38);
__m256i const_94 = __lasx_xvldi(94);
__m256i const_18 = __lasx_xvldi(18);
- __m256i const_0x8080 = {0x8080808080808080, 0x8080808080808080,
- 0x8080808080808080, 0x8080808080808080};
+ __m256i const_0x8080 = (__m256i)v4u64{0x8080808080808080, 0x8080808080808080,
+ 0x8080808080808080, 0x8080808080808080};
__m256i control = {0x0000000400000000, 0x0000000500000001, 0x0000000600000002,
0x0000000700000003};
for (x = 0; x < len; x++) {
@@ -1216,7 +1182,7 @@ void ARGBAttenuateRow_LASX(const uint8_t* src_argb,
void ARGBToRGB565DitherRow_LASX(const uint8_t* src_argb,
uint8_t* dst_rgb,
- const uint32_t dither4,
+ uint32_t dither4,
int width) {
int x;
int len = width / 16;
@@ -1643,8 +1609,8 @@ void ARGB1555ToUVRow_LASX(const uint8_t* src_argb1555,
__m256i const_38 = __lasx_xvldi(0x413);
__m256i const_94 = __lasx_xvldi(0x42F);
__m256i const_18 = __lasx_xvldi(0x409);
- __m256i const_8080 = {0x8080808080808080, 0x8080808080808080,
- 0x8080808080808080, 0x8080808080808080};
+ __m256i const_8080 = (__m256i)v4u64{0x8080808080808080, 0x8080808080808080,
+ 0x8080808080808080, 0x8080808080808080};
for (x = 0; x < len; x++) {
DUP4_ARG2(__lasx_xvld, src_argb1555, 0, src_argb1555, 32, next_argb1555, 0,
@@ -1760,8 +1726,8 @@ void RGB565ToUVRow_LASX(const uint8_t* src_rgb565,
__m256i const_38 = __lasx_xvldi(0x413);
__m256i const_94 = __lasx_xvldi(0x42F);
__m256i const_18 = __lasx_xvldi(0x409);
- __m256i const_8080 = {0x8080808080808080, 0x8080808080808080,
- 0x8080808080808080, 0x8080808080808080};
+ __m256i const_8080 = (__m256i)v4u64{0x8080808080808080, 0x8080808080808080,
+ 0x8080808080808080, 0x8080808080808080};
for (x = 0; x < len; x++) {
DUP4_ARG2(__lasx_xvld, src_rgb565, 0, src_rgb565, 32, next_rgb565, 0,
@@ -1811,48 +1777,6 @@ void RGB565ToUVRow_LASX(const uint8_t* src_rgb565,
}
}
-void RGB24ToYRow_LASX(const uint8_t* src_rgb24, uint8_t* dst_y, int width) {
- int x;
- int len = width / 32;
- __m256i src0, src1, src2;
- __m256i tmp0, tmp1, tmp2, tmp3;
- __m256i reg0, reg1, reg2, dst0;
- __m256i const_129 = __lasx_xvldi(129);
- __m256i const_br = {0x4219421942194219, 0x4219421942194219,
- 0x4219421942194219, 0x4219421942194219};
- __m256i const_1080 = {0x1080108010801080, 0x1080108010801080,
- 0x1080108010801080, 0x1080108010801080};
- __m256i shuff0 = {0x0B09080605030200, 0x17151412110F0E0C, 0x0B09080605030200,
- 0x17151412110F0E0C};
- __m256i shuff1 = {0x0301001E1D1B1A18, 0x0F0D0C0A09070604, 0x0301001E1D1B1A18,
- 0x0F0D0C0A09070604};
- __m256i shuff2 = {0x000A000700040001, 0x001600130010000D, 0x000A000700040001,
- 0x001600130010000D};
- __m256i shuff3 = {0x0002001F001C0019, 0x000E000B00080005, 0x0002001F001C0019,
- 0x000E000B00080005};
-
- for (x = 0; x < len; x++) {
- reg0 = __lasx_xvld(src_rgb24, 0);
- reg1 = __lasx_xvld(src_rgb24, 32);
- reg2 = __lasx_xvld(src_rgb24, 64);
- src0 = __lasx_xvpermi_q(reg1, reg0, 0x30);
- src1 = __lasx_xvpermi_q(reg2, reg0, 0x21);
- src2 = __lasx_xvpermi_q(reg2, reg1, 0x30);
- tmp0 = __lasx_xvshuf_b(src1, src0, shuff0);
- tmp1 = __lasx_xvshuf_b(src1, src2, shuff1);
- tmp2 = __lasx_xvshuf_b(src1, src0, shuff2);
- tmp3 = __lasx_xvshuf_b(src1, src2, shuff3);
- reg0 = __lasx_xvmaddwev_h_bu(const_1080, tmp2, const_129);
- reg1 = __lasx_xvmaddwev_h_bu(const_1080, tmp3, const_129);
- reg0 = __lasx_xvdp2add_h_bu(reg0, const_br, tmp0);
- reg1 = __lasx_xvdp2add_h_bu(reg1, const_br, tmp1);
- dst0 = __lasx_xvpickod_b(reg1, reg0);
- __lasx_xvst(dst0, dst_y, 0);
- dst_y += 32;
- src_rgb24 += 96;
- }
-}
-
void RGB24ToUVRow_LASX(const uint8_t* src_rgb24,
int src_stride_rgb24,
uint8_t* dst_u,
@@ -1869,8 +1793,8 @@ void RGB24ToUVRow_LASX(const uint8_t* src_rgb24,
__m256i const_38 = __lasx_xvldi(0x413);
__m256i const_94 = __lasx_xvldi(0x42F);
__m256i const_18 = __lasx_xvldi(0x409);
- __m256i const_8080 = {0x8080808080808080, 0x8080808080808080,
- 0x8080808080808080, 0x8080808080808080};
+ __m256i const_8080 = (__m256i)v4u64{0x8080808080808080, 0x8080808080808080,
+ 0x8080808080808080, 0x8080808080808080};
__m256i shuff0_b = {0x15120F0C09060300, 0x00000000001E1B18,
0x15120F0C09060300, 0x00000000001E1B18};
__m256i shuff1_b = {0x0706050403020100, 0x1D1A1714110A0908,
@@ -1916,48 +1840,6 @@ void RGB24ToUVRow_LASX(const uint8_t* src_rgb24,
}
}
-void RAWToYRow_LASX(const uint8_t* src_raw, uint8_t* dst_y, int width) {
- int x;
- int len = width / 32;
- __m256i src0, src1, src2;
- __m256i tmp0, tmp1, tmp2, tmp3;
- __m256i reg0, reg1, reg2, dst0;
- __m256i const_129 = __lasx_xvldi(129);
- __m256i const_br = {0x1942194219421942, 0x1942194219421942,
- 0x1942194219421942, 0x1942194219421942};
- __m256i const_1080 = {0x1080108010801080, 0x1080108010801080,
- 0x1080108010801080, 0x1080108010801080};
- __m256i shuff0 = {0x0B09080605030200, 0x17151412110F0E0C, 0x0B09080605030200,
- 0x17151412110F0E0C};
- __m256i shuff1 = {0x0301001E1D1B1A18, 0x0F0D0C0A09070604, 0x0301001E1D1B1A18,
- 0x0F0D0C0A09070604};
- __m256i shuff2 = {0x000A000700040001, 0x001600130010000D, 0x000A000700040001,
- 0x001600130010000D};
- __m256i shuff3 = {0x0002001F001C0019, 0x000E000B00080005, 0x0002001F001C0019,
- 0x000E000B00080005};
-
- for (x = 0; x < len; x++) {
- reg0 = __lasx_xvld(src_raw, 0);
- reg1 = __lasx_xvld(src_raw, 32);
- reg2 = __lasx_xvld(src_raw, 64);
- src0 = __lasx_xvpermi_q(reg1, reg0, 0x30);
- src1 = __lasx_xvpermi_q(reg2, reg0, 0x21);
- src2 = __lasx_xvpermi_q(reg2, reg1, 0x30);
- tmp0 = __lasx_xvshuf_b(src1, src0, shuff0);
- tmp1 = __lasx_xvshuf_b(src1, src2, shuff1);
- tmp2 = __lasx_xvshuf_b(src1, src0, shuff2);
- tmp3 = __lasx_xvshuf_b(src1, src2, shuff3);
- reg0 = __lasx_xvmaddwev_h_bu(const_1080, tmp2, const_129);
- reg1 = __lasx_xvmaddwev_h_bu(const_1080, tmp3, const_129);
- reg0 = __lasx_xvdp2add_h_bu(reg0, const_br, tmp0);
- reg1 = __lasx_xvdp2add_h_bu(reg1, const_br, tmp1);
- dst0 = __lasx_xvpickod_b(reg1, reg0);
- __lasx_xvst(dst0, dst_y, 0);
- dst_y += 32;
- src_raw += 96;
- }
-}
-
void RAWToUVRow_LASX(const uint8_t* src_raw,
int src_stride_raw,
uint8_t* dst_u,
@@ -1974,8 +1856,8 @@ void RAWToUVRow_LASX(const uint8_t* src_raw,
__m256i const_38 = __lasx_xvldi(0x413);
__m256i const_94 = __lasx_xvldi(0x42F);
__m256i const_18 = __lasx_xvldi(0x409);
- __m256i const_8080 = {0x8080808080808080, 0x8080808080808080,
- 0x8080808080808080, 0x8080808080808080};
+ __m256i const_8080 = (__m256i)v4u64{0x8080808080808080, 0x8080808080808080,
+ 0x8080808080808080, 0x8080808080808080};
__m256i shuff0_r = {0x15120F0C09060300, 0x00000000001E1B18,
0x15120F0C09060300, 0x00000000001E1B18};
__m256i shuff1_r = {0x0706050403020100, 0x1D1A1714110A0908,
@@ -2118,36 +2000,228 @@ void NV21ToARGBRow_LASX(const uint8_t* src_y,
}
}
-void ARGBToYJRow_LASX(const uint8_t* src_argb, uint8_t* dst_y, int width) {
- int x;
- int len = width / 32;
- __m256i src0, src1, src2, src3, dst0;
- __m256i tmp0, tmp1, tmp2, tmp3;
- __m256i reg0, reg1;
- __m256i const_128 = __lasx_xvldi(0x480);
- __m256i const_150 = __lasx_xvldi(0x96);
- __m256i const_br = {0x4D1D4D1D4D1D4D1D, 0x4D1D4D1D4D1D4D1D,
- 0x4D1D4D1D4D1D4D1D, 0x4D1D4D1D4D1D4D1D};
- __m256i shuff = {0x0000000400000000, 0x0000000500000001, 0x0000000600000002,
- 0x0000000700000003};
+struct RgbConstants {
+ uint8_t kRGBToY[4];
+ uint16_t kAddY;
+ uint16_t pad;
+};
+
+// RGB to JPeg coefficients
+// B * 0.1140 coefficient = 29
+// G * 0.5870 coefficient = 150
+// R * 0.2990 coefficient = 77
+// Add 0.5 = 0x80
+static const struct RgbConstants kRgb24JPEGConstants = {{29, 150, 77, 0},
+ 128,
+ 0};
+
+static const struct RgbConstants kRawJPEGConstants = {{77, 150, 29, 0}, 128, 0};
+
+// RGB to BT.601 coefficients
+// B * 0.1016 coefficient = 25
+// G * 0.5078 coefficient = 129
+// R * 0.2578 coefficient = 66
+// Add 16.5 = 0x1080
+
+static const struct RgbConstants kRgb24I601Constants = {{25, 129, 66, 0},
+ 0x1080,
+ 0};
+
+static const struct RgbConstants kRawI601Constants = {{66, 129, 25, 0},
+ 0x1080,
+ 0};
+
+// ARGB expects first 3 values to contain RGB and 4th value is ignored.
+static void ARGBToYMatrixRow_LASX(const uint8_t* src_argb,
+ uint8_t* dst_y,
+ int width,
+ const struct RgbConstants* rgbconstants) {
+ int32_t shuff[8] = {0, 4, 1, 5, 2, 6, 3, 7};
+ asm volatile(
+ "xvldrepl.b $xr0, %3, 0 \n\t" // load rgbconstants
+ "xvldrepl.b $xr1, %3, 1 \n\t" // load rgbconstants
+ "xvldrepl.b $xr2, %3, 2 \n\t" // load rgbconstants
+ "xvldrepl.h $xr3, %3, 4 \n\t" // load rgbconstants
+ "xvld $xr20, %4, 0 \n\t" // load shuff
+ "1: \n\t"
+ "xvld $xr4, %0, 0 \n\t"
+ "xvld $xr5, %0, 32 \n\t"
+ "xvld $xr6, %0, 64 \n\t"
+ "xvld $xr7, %0, 96 \n\t" // load 32 pixels of
+ // ARGB
+ "xvor.v $xr12, $xr3, $xr3 \n\t"
+ "xvor.v $xr13, $xr3, $xr3 \n\t"
+ "addi.d %2, %2, -32 \n\t" // 32 processed per
+ // loop.
+ "xvpickev.b $xr8, $xr5, $xr4 \n\t" // BR
+ "xvpickev.b $xr10, $xr7, $xr6 \n\t"
+ "xvpickod.b $xr9, $xr5, $xr4 \n\t" // GA
+ "xvpickod.b $xr11, $xr7, $xr6 \n\t"
+ "xvmaddwev.h.bu $xr12, $xr8, $xr0 \n\t" // B
+ "xvmaddwev.h.bu $xr13, $xr10, $xr0 \n\t"
+ "xvmaddwev.h.bu $xr12, $xr9, $xr1 \n\t" // G
+ "xvmaddwev.h.bu $xr13, $xr11, $xr1 \n\t"
+ "xvmaddwod.h.bu $xr12, $xr8, $xr2 \n\t" // R
+ "xvmaddwod.h.bu $xr13, $xr10, $xr2 \n\t"
+ "addi.d %0, %0, 128 \n\t"
+ "xvpickod.b $xr10, $xr13, $xr12 \n\t"
+ "xvperm.w $xr11, $xr10, $xr20 \n\t"
+ "xvst $xr11, %1, 0 \n\t"
+ "addi.d %1, %1, 32 \n\t"
+ "bnez %2, 1b \n\t"
+ : "+&r"(src_argb), // %0
+ "+&r"(dst_y), // %1
+ "+&r"(width) // %2
+ : "r"(rgbconstants), "r"(shuff)
+ : "memory");
+}
- for (x = 0; x < len; x++) {
- DUP4_ARG2(__lasx_xvld, src_argb, 0, src_argb, 32, src_argb, 64, src_argb,
- 96, src0, src1, src2, src3);
- tmp0 = __lasx_xvpickev_b(src1, src0);
- tmp1 = __lasx_xvpickod_b(src1, src0);
- tmp2 = __lasx_xvpickev_b(src3, src2);
- tmp3 = __lasx_xvpickod_b(src3, src2);
- reg0 = __lasx_xvmaddwev_h_bu(const_128, tmp1, const_150);
- reg1 = __lasx_xvmaddwev_h_bu(const_128, tmp3, const_150);
- reg0 = __lasx_xvdp2add_h_bu(reg0, const_br, tmp0);
- reg1 = __lasx_xvdp2add_h_bu(reg1, const_br, tmp2);
- dst0 = __lasx_xvpickod_b(reg1, reg0);
- dst0 = __lasx_xvperm_w(dst0, shuff);
- __lasx_xvst(dst0, dst_y, 0);
- dst_y += 32;
- src_argb += 128;
- }
+void ARGBToYRow_LASX(const uint8_t* src_argb, uint8_t* dst_y, int width) {
+ ARGBToYMatrixRow_LASX(src_argb, dst_y, width, &kRgb24I601Constants);
+}
+
+void ARGBToYJRow_LASX(const uint8_t* src_argb, uint8_t* dst_yj, int width) {
+ ARGBToYMatrixRow_LASX(src_argb, dst_yj, width, &kRgb24JPEGConstants);
+}
+
+void ABGRToYRow_LASX(const uint8_t* src_abgr, uint8_t* dst_y, int width) {
+ ARGBToYMatrixRow_LASX(src_abgr, dst_y, width, &kRawI601Constants);
+}
+
+void ABGRToYJRow_LASX(const uint8_t* src_abgr, uint8_t* dst_yj, int width) {
+ ARGBToYMatrixRow_LASX(src_abgr, dst_yj, width, &kRawJPEGConstants);
+}
+
+// RGBA expects first value to be A and ignored, then 3 values to contain RGB.
+// Same code as ARGB, except the LD4
+static void RGBAToYMatrixRow_LASX(const uint8_t* src_rgba,
+ uint8_t* dst_y,
+ int width,
+ const struct RgbConstants* rgbconstants) {
+ int32_t shuff[8] = {0, 4, 1, 5, 2, 6, 3, 7};
+ asm volatile(
+ "xvldrepl.b $xr0, %3, 0 \n\t" // load rgbconstants
+ "xvldrepl.b $xr1, %3, 1 \n\t" // load rgbconstants
+ "xvldrepl.b $xr2, %3, 2 \n\t" // load rgbconstants
+ "xvldrepl.h $xr3, %3, 4 \n\t" // load rgbconstants
+ "xvld $xr20, %4, 0 \n\t" // load shuff
+ "1: \n\t"
+ "xvld $xr4, %0, 0 \n\t"
+ "xvld $xr5, %0, 32 \n\t"
+ "xvld $xr6, %0, 64 \n\t"
+ "xvld $xr7, %0, 96 \n\t" // load 32 pixels of
+ // RGBA
+ "xvor.v $xr12, $xr3, $xr3 \n\t"
+ "xvor.v $xr13, $xr3, $xr3 \n\t"
+ "addi.d %2, %2, -32 \n\t" // 32 processed per
+ // loop.
+ "xvpickev.b $xr8, $xr5, $xr4 \n\t" // AG
+ "xvpickev.b $xr10, $xr7, $xr6 \n\t"
+ "xvpickod.b $xr9, $xr5, $xr4 \n\t" // BR
+ "xvpickod.b $xr11, $xr7, $xr6 \n\t"
+ "xvmaddwev.h.bu $xr12, $xr9, $xr0 \n\t" // B
+ "xvmaddwev.h.bu $xr13, $xr11, $xr0 \n\t"
+ "xvmaddwod.h.bu $xr12, $xr8, $xr1 \n\t" // G
+ "xvmaddwod.h.bu $xr13, $xr10, $xr1 \n\t"
+ "xvmaddwod.h.bu $xr12, $xr9, $xr2 \n\t" // R
+ "xvmaddwod.h.bu $xr13, $xr11, $xr2 \n\t"
+ "addi.d %0, %0, 128 \n\t"
+ "xvpickod.b $xr10, $xr13, $xr12 \n\t"
+ "xvperm.w $xr11, $xr10, $xr20 \n\t"
+ "xvst $xr11, %1, 0 \n\t"
+ "addi.d %1, %1, 32 \n\t"
+ "bnez %2, 1b \n\t"
+ : "+&r"(src_rgba), // %0
+ "+&r"(dst_y), // %1
+ "+&r"(width) // %2
+ : "r"(rgbconstants), "r"(shuff)
+ : "memory");
+}
+
+void RGBAToYRow_LASX(const uint8_t* src_rgba, uint8_t* dst_y, int width) {
+ RGBAToYMatrixRow_LASX(src_rgba, dst_y, width, &kRgb24I601Constants);
+}
+
+void RGBAToYJRow_LASX(const uint8_t* src_rgba, uint8_t* dst_yj, int width) {
+ RGBAToYMatrixRow_LASX(src_rgba, dst_yj, width, &kRgb24JPEGConstants);
+}
+
+void BGRAToYRow_LASX(const uint8_t* src_bgra, uint8_t* dst_y, int width) {
+ RGBAToYMatrixRow_LASX(src_bgra, dst_y, width, &kRawI601Constants);
+}
+
+static void RGBToYMatrixRow_LASX(const uint8_t* src_rgba,
+ uint8_t* dst_y,
+ int width,
+ const struct RgbConstants* rgbconstants) {
+ int8_t shuff[128] = {
+ 0, 2, 3, 5, 6, 8, 9, 11, 12, 14, 15, 17, 18, 20, 21, 23,
+ 0, 2, 3, 5, 6, 8, 9, 11, 12, 14, 15, 17, 18, 20, 21, 23,
+ 24, 26, 27, 29, 30, 0, 1, 3, 4, 6, 7, 9, 10, 12, 13, 15,
+ 24, 26, 27, 29, 30, 0, 1, 3, 4, 6, 7, 9, 10, 12, 13, 15,
+ 1, 0, 4, 0, 7, 0, 10, 0, 13, 0, 16, 0, 19, 0, 22, 0,
+ 1, 0, 4, 0, 7, 0, 10, 0, 13, 0, 16, 0, 19, 0, 22, 0,
+ 25, 0, 28, 0, 31, 0, 2, 0, 5, 0, 8, 0, 11, 0, 14, 0,
+ 25, 0, 28, 0, 31, 0, 2, 0, 5, 0, 8, 0, 11, 0, 14, 0};
+ asm volatile(
+ "xvldrepl.b $xr0, %3, 0 \n\t" // load rgbconstants
+ "xvldrepl.b $xr1, %3, 1 \n\t" // load rgbconstants
+ "xvldrepl.b $xr2, %3, 2 \n\t" // load rgbconstants
+ "xvldrepl.h $xr3, %3, 4 \n\t" // load rgbconstants
+ "xvld $xr4, %4, 0 \n\t" // load shuff
+ "xvld $xr5, %4, 32 \n\t"
+ "xvld $xr6, %4, 64 \n\t"
+ "xvld $xr7, %4, 96 \n\t"
+ "1: \n\t"
+ "xvld $xr8, %0, 0 \n\t"
+ "xvld $xr9, %0, 32 \n\t"
+ "xvld $xr10, %0, 64 \n\t" // load 32 pixels of
+ // RGB
+ "xvor.v $xr12, $xr3, $xr3 \n\t"
+ "xvor.v $xr13, $xr3, $xr3 \n\t"
+ "xvor.v $xr11, $xr9, $xr9 \n\t"
+ "addi.d %2, %2, -32 \n\t" // 32 processed per
+ // loop.
+ "xvpermi.q $xr9, $xr8, 0x30 \n\t" // src0
+ "xvpermi.q $xr8, $xr10, 0x03 \n\t" // src1
+ "xvpermi.q $xr10, $xr11, 0x30 \n\t" // src2
+ "xvshuf.b $xr14, $xr8, $xr9, $xr4 \n\t"
+ "xvshuf.b $xr15, $xr8, $xr10, $xr5 \n\t"
+ "xvshuf.b $xr16, $xr8, $xr9, $xr6 \n\t"
+ "xvshuf.b $xr17, $xr8, $xr10, $xr7 \n\t"
+ "xvmaddwev.h.bu $xr12, $xr16, $xr1 \n\t" // G
+ "xvmaddwev.h.bu $xr13, $xr17, $xr1 \n\t"
+ "xvmaddwev.h.bu $xr12, $xr14, $xr0 \n\t" // B
+ "xvmaddwev.h.bu $xr13, $xr15, $xr0 \n\t"
+ "xvmaddwod.h.bu $xr12, $xr14, $xr2 \n\t" // R
+ "xvmaddwod.h.bu $xr13, $xr15, $xr2 \n\t"
+ "addi.d %0, %0, 96 \n\t"
+ "xvpickod.b $xr10, $xr13, $xr12 \n\t"
+ "xvst $xr10, %1, 0 \n\t"
+ "addi.d %1, %1, 32 \n\t"
+ "bnez %2, 1b \n\t"
+ : "+&r"(src_rgba), // %0
+ "+&r"(dst_y), // %1
+ "+&r"(width) // %2
+ : "r"(rgbconstants), // %3
+ "r"(shuff) // %4
+ : "memory");
+}
+
+void RGB24ToYJRow_LASX(const uint8_t* src_rgb24, uint8_t* dst_yj, int width) {
+ RGBToYMatrixRow_LASX(src_rgb24, dst_yj, width, &kRgb24JPEGConstants);
+}
+
+void RAWToYJRow_LASX(const uint8_t* src_raw, uint8_t* dst_yj, int width) {
+ RGBToYMatrixRow_LASX(src_raw, dst_yj, width, &kRawJPEGConstants);
+}
+
+void RGB24ToYRow_LASX(const uint8_t* src_rgb24, uint8_t* dst_y, int width) {
+ RGBToYMatrixRow_LASX(src_rgb24, dst_y, width, &kRgb24I601Constants);
+}
+
+void RAWToYRow_LASX(const uint8_t* src_raw, uint8_t* dst_y, int width) {
+ RGBToYMatrixRow_LASX(src_raw, dst_y, width, &kRawI601Constants);
}
void ARGBToUVJRow_LASX(const uint8_t* src_argb,
@@ -2168,8 +2242,8 @@ void ARGBToUVJRow_LASX(const uint8_t* src_argb,
__m256i const_21 = __lasx_xvldi(0x415);
__m256i const_53 = __lasx_xvldi(0x435);
__m256i const_10 = __lasx_xvldi(0x40A);
- __m256i const_8080 = {0x8080808080808080, 0x8080808080808080,
- 0x8080808080808080, 0x8080808080808080};
+ __m256i const_8080 = (__m256i)v4u64{0x8080808080808080, 0x8080808080808080,
+ 0x8080808080808080, 0x8080808080808080};
__m256i shuff = {0x1614060412100200, 0x1E1C0E0C1A180A08, 0x1715070513110301,
0x1F1D0F0D1B190B09};
diff --git a/files/source/row_lsx.cc b/source/row_lsx.cc
index 3e8b901a..fa088c9e 100644
--- a/files/source/row_lsx.cc
+++ b/source/row_lsx.cc
@@ -31,6 +31,91 @@ extern "C" {
yb = __lsx_vreplgr2vr_w(yuvconst->kYBiasToRgb[0]); \
}
+// Load 32 YUV422 pixel data
+#define READYUV422_D(psrc_y, psrc_u, psrc_v, out_y, uv_l, uv_h) \
+ { \
+ __m128i temp0, temp1; \
+ \
+ DUP2_ARG2(__lsx_vld, psrc_y, 0, psrc_u, 0, out_y, temp0); \
+ temp1 = __lsx_vld(psrc_v, 0); \
+ temp0 = __lsx_vsub_b(temp0, const_80); \
+ temp1 = __lsx_vsub_b(temp1, const_80); \
+ temp0 = __lsx_vsllwil_h_b(temp0, 0); \
+ temp1 = __lsx_vsllwil_h_b(temp1, 0); \
+ uv_l = __lsx_vilvl_h(temp0, temp1); \
+ uv_h = __lsx_vilvh_h(temp0, temp1); \
+ }
+
+// Load 16 YUV422 pixel data
+#define READYUV422(psrc_y, psrc_u, psrc_v, out_y, uv) \
+ { \
+ __m128i temp0, temp1; \
+ \
+ out_y = __lsx_vld(psrc_y, 0); \
+ temp0 = __lsx_vldrepl_d(psrc_u, 0); \
+ temp1 = __lsx_vldrepl_d(psrc_v, 0); \
+ uv = __lsx_vilvl_b(temp0, temp1); \
+ uv = __lsx_vsub_b(uv, const_80); \
+ uv = __lsx_vsllwil_h_b(uv, 0); \
+ }
+
+// Convert 16 pixels of YUV420 to RGB.
+#define YUVTORGB_D(in_y, in_uvl, in_uvh, ubvr, ugvg, yg, yb, b_l, b_h, g_l, \
+ g_h, r_l, r_h) \
+ { \
+ __m128i u_l, u_h, v_l, v_h; \
+ __m128i yl_ev, yl_od, yh_ev, yh_od; \
+ __m128i temp0, temp1, temp2, temp3; \
+ \
+ temp0 = __lsx_vilvl_b(in_y, in_y); \
+ temp1 = __lsx_vilvh_b(in_y, in_y); \
+ yl_ev = __lsx_vmulwev_w_hu_h(temp0, yg); \
+ yl_od = __lsx_vmulwod_w_hu_h(temp0, yg); \
+ yh_ev = __lsx_vmulwev_w_hu_h(temp1, yg); \
+ yh_od = __lsx_vmulwod_w_hu_h(temp1, yg); \
+ DUP4_ARG2(__lsx_vsrai_w, yl_ev, 16, yl_od, 16, yh_ev, 16, yh_od, 16, \
+ yl_ev, yl_od, yh_ev, yh_od); \
+ yl_ev = __lsx_vadd_w(yl_ev, yb); \
+ yl_od = __lsx_vadd_w(yl_od, yb); \
+ yh_ev = __lsx_vadd_w(yh_ev, yb); \
+ yh_od = __lsx_vadd_w(yh_od, yb); \
+ v_l = __lsx_vmulwev_w_h(in_uvl, ubvr); \
+ u_l = __lsx_vmulwod_w_h(in_uvl, ubvr); \
+ v_h = __lsx_vmulwev_w_h(in_uvh, ubvr); \
+ u_h = __lsx_vmulwod_w_h(in_uvh, ubvr); \
+ temp0 = __lsx_vadd_w(yl_ev, u_l); \
+ temp1 = __lsx_vadd_w(yl_od, u_l); \
+ temp2 = __lsx_vadd_w(yh_ev, u_h); \
+ temp3 = __lsx_vadd_w(yh_od, u_h); \
+ DUP4_ARG2(__lsx_vsrai_w, temp0, 6, temp1, 6, temp2, 6, temp3, 6, temp0, \
+ temp1, temp2, temp3); \
+ DUP4_ARG1(__lsx_vclip255_w, temp0, temp1, temp2, temp3, temp0, temp1, \
+ temp2, temp3); \
+ b_l = __lsx_vpackev_h(temp1, temp0); \
+ b_h = __lsx_vpackev_h(temp3, temp2); \
+ temp0 = __lsx_vadd_w(yl_ev, v_l); \
+ temp1 = __lsx_vadd_w(yl_od, v_l); \
+ temp2 = __lsx_vadd_w(yh_ev, v_h); \
+ temp3 = __lsx_vadd_w(yh_od, v_h); \
+ DUP4_ARG2(__lsx_vsrai_w, temp0, 6, temp1, 6, temp2, 6, temp3, 6, temp0, \
+ temp1, temp2, temp3); \
+ DUP4_ARG1(__lsx_vclip255_w, temp0, temp1, temp2, temp3, temp0, temp1, \
+ temp2, temp3); \
+ r_l = __lsx_vpackev_h(temp1, temp0); \
+ r_h = __lsx_vpackev_h(temp3, temp2); \
+ DUP2_ARG2(__lsx_vdp2_w_h, in_uvl, ugvg, in_uvh, ugvg, u_l, u_h); \
+ temp0 = __lsx_vsub_w(yl_ev, u_l); \
+ temp1 = __lsx_vsub_w(yl_od, u_l); \
+ temp2 = __lsx_vsub_w(yh_ev, u_h); \
+ temp3 = __lsx_vsub_w(yh_od, u_h); \
+ DUP4_ARG2(__lsx_vsrai_w, temp0, 6, temp1, 6, temp2, 6, temp3, 6, temp0, \
+ temp1, temp2, temp3); \
+ DUP4_ARG1(__lsx_vclip255_w, temp0, temp1, temp2, temp3, temp0, temp1, \
+ temp2, temp3); \
+ g_l = __lsx_vpackev_h(temp1, temp0); \
+ g_h = __lsx_vpackev_h(temp3, temp2); \
+ }
+
// Convert 8 pixels of YUV420 to RGB.
#define YUVTORGB(in_y, in_vu, vrub, vgug, yg, yb, out_b, out_g, out_r) \
{ \
@@ -118,6 +203,25 @@ extern "C" {
out_g = __lsx_vpackev_h(tmp1, tmp0); \
}
+// Pack and Store 16 ARGB values.
+#define STOREARGB_D(a_l, a_h, r_l, r_h, g_l, g_h, b_l, b_h, pdst_argb) \
+ { \
+ __m128i temp0, temp1, temp2, temp3; \
+ temp0 = __lsx_vpackev_b(g_l, b_l); \
+ temp1 = __lsx_vpackev_b(a_l, r_l); \
+ temp2 = __lsx_vpackev_b(g_h, b_h); \
+ temp3 = __lsx_vpackev_b(a_h, r_h); \
+ r_l = __lsx_vilvl_h(temp1, temp0); \
+ r_h = __lsx_vilvh_h(temp1, temp0); \
+ g_l = __lsx_vilvl_h(temp3, temp2); \
+ g_h = __lsx_vilvh_h(temp3, temp2); \
+ __lsx_vst(r_l, pdst_argb, 0); \
+ __lsx_vst(r_h, pdst_argb, 16); \
+ __lsx_vst(g_l, pdst_argb, 32); \
+ __lsx_vst(g_h, pdst_argb, 48); \
+ pdst_argb += 64; \
+ }
+
// Pack and Store 8 ARGB values.
#define STOREARGB(in_a, in_r, in_g, in_b, pdst_argb) \
{ \
@@ -155,6 +259,1028 @@ extern "C" {
_dst0 = __lsx_vpickod_b(_reg1, _reg0); \
}
+void MirrorRow_LSX(const uint8_t* src, uint8_t* dst, int width) {
+ int x;
+ int len = width / 32;
+ __m128i src0, src1;
+ __m128i shuffler = {0x08090A0B0C0D0E0F, 0x0001020304050607};
+ src += width - 32;
+ for (x = 0; x < len; x++) {
+ DUP2_ARG2(__lsx_vld, src, 0, src, 16, src0, src1);
+ DUP2_ARG3(__lsx_vshuf_b, src0, src0, shuffler, src1, src1, shuffler, src0,
+ src1);
+ __lsx_vst(src1, dst, 0);
+ __lsx_vst(src0, dst, 16);
+ dst += 32;
+ src -= 32;
+ }
+}
+
+void MirrorUVRow_LSX(const uint8_t* src_uv, uint8_t* dst_uv, int width) {
+ int x;
+ int len = width / 8;
+ __m128i src, dst;
+ __m128i shuffler = {0x0004000500060007, 0x0000000100020003};
+
+ src_uv += (width - 8) << 1;
+ for (x = 0; x < len; x++) {
+ src = __lsx_vld(src_uv, 0);
+ dst = __lsx_vshuf_h(shuffler, src, src);
+ __lsx_vst(dst, dst_uv, 0);
+ src_uv -= 16;
+ dst_uv += 16;
+ }
+}
+
+void ARGBMirrorRow_LSX(const uint8_t* src, uint8_t* dst, int width) {
+ int x;
+ int len = width / 8;
+ __m128i src0, src1;
+ __m128i shuffler = {0x0B0A09080F0E0D0C, 0x0302010007060504};
+
+ src += (width * 4) - 32;
+ for (x = 0; x < len; x++) {
+ DUP2_ARG2(__lsx_vld, src, 0, src, 16, src0, src1);
+ DUP2_ARG3(__lsx_vshuf_b, src0, src0, shuffler, src1, src1, shuffler, src0,
+ src1);
+ __lsx_vst(src1, dst, 0);
+ __lsx_vst(src0, dst, 16);
+ dst += 32;
+ src -= 32;
+ }
+}
+
+void I422ToYUY2Row_LSX(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_yuy2,
+ int width) {
+ int x;
+ int len = width / 16;
+ __m128i src_u0, src_v0, src_y0, vec_uv0;
+ __m128i vec_yuy2_0, vec_yuy2_1;
+
+ for (x = 0; x < len; x++) {
+ DUP2_ARG2(__lsx_vld, src_u, 0, src_v, 0, src_u0, src_v0);
+ src_y0 = __lsx_vld(src_y, 0);
+ vec_uv0 = __lsx_vilvl_b(src_v0, src_u0);
+ vec_yuy2_0 = __lsx_vilvl_b(vec_uv0, src_y0);
+ vec_yuy2_1 = __lsx_vilvh_b(vec_uv0, src_y0);
+ __lsx_vst(vec_yuy2_0, dst_yuy2, 0);
+ __lsx_vst(vec_yuy2_1, dst_yuy2, 16);
+ src_u += 8;
+ src_v += 8;
+ src_y += 16;
+ dst_yuy2 += 32;
+ }
+}
+
+void I422ToUYVYRow_LSX(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_uyvy,
+ int width) {
+ int x;
+ int len = width / 16;
+ __m128i src_u0, src_v0, src_y0, vec_uv0;
+ __m128i vec_uyvy0, vec_uyvy1;
+
+ for (x = 0; x < len; x++) {
+ DUP2_ARG2(__lsx_vld, src_u, 0, src_v, 0, src_u0, src_v0);
+ src_y0 = __lsx_vld(src_y, 0);
+ vec_uv0 = __lsx_vilvl_b(src_v0, src_u0);
+ vec_uyvy0 = __lsx_vilvl_b(src_y0, vec_uv0);
+ vec_uyvy1 = __lsx_vilvh_b(src_y0, vec_uv0);
+ __lsx_vst(vec_uyvy0, dst_uyvy, 0);
+ __lsx_vst(vec_uyvy1, dst_uyvy, 16);
+ src_u += 8;
+ src_v += 8;
+ src_y += 16;
+ dst_uyvy += 32;
+ }
+}
+
+void I422ToARGBRow_LSX(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ int x;
+ int len = width / 16;
+ __m128i vec_yb, vec_yg, vec_ub, vec_ug, vec_vr, vec_vg;
+ __m128i vec_ubvr, vec_ugvg;
+ __m128i alpha = __lsx_vldi(0xFF);
+ __m128i const_80 = __lsx_vldi(0x80);
+
+ YUVTORGB_SETUP(yuvconstants, vec_vr, vec_ub, vec_vg, vec_ug, vec_yg, vec_yb);
+ vec_ubvr = __lsx_vilvl_h(vec_ub, vec_vr);
+ vec_ugvg = __lsx_vilvl_h(vec_ug, vec_vg);
+
+ for (x = 0; x < len; x++) {
+ __m128i y, uv_l, uv_h, b_l, b_h, g_l, g_h, r_l, r_h;
+
+ READYUV422_D(src_y, src_u, src_v, y, uv_l, uv_h);
+ YUVTORGB_D(y, uv_l, uv_h, vec_ubvr, vec_ugvg, vec_yg, vec_yb, b_l, b_h, g_l,
+ g_h, r_l, r_h);
+ STOREARGB_D(alpha, alpha, r_l, r_h, g_l, g_h, b_l, b_h, dst_argb);
+ src_y += 16;
+ src_u += 8;
+ src_v += 8;
+ }
+}
+
+void I422ToRGBARow_LSX(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ int x;
+ int len = width / 16;
+ __m128i vec_yb, vec_yg, vec_ub, vec_vr, vec_ug, vec_vg;
+ __m128i vec_ubvr, vec_ugvg;
+ __m128i alpha = __lsx_vldi(0xFF);
+ __m128i const_80 = __lsx_vldi(0x80);
+
+ YUVTORGB_SETUP(yuvconstants, vec_vr, vec_ub, vec_vg, vec_ug, vec_yg, vec_yb);
+ vec_ubvr = __lsx_vilvl_h(vec_ub, vec_vr);
+ vec_ugvg = __lsx_vilvl_h(vec_ug, vec_vg);
+
+ for (x = 0; x < len; x++) {
+ __m128i y, uv_l, uv_h, b_l, b_h, g_l, g_h, r_l, r_h;
+
+ READYUV422_D(src_y, src_u, src_v, y, uv_l, uv_h);
+ YUVTORGB_D(y, uv_l, uv_h, vec_ubvr, vec_ugvg, vec_yg, vec_yb, b_l, b_h, g_l,
+ g_h, r_l, r_h);
+ STOREARGB_D(r_l, r_h, g_l, g_h, b_l, b_h, alpha, alpha, dst_argb);
+ src_y += 16;
+ src_u += 8;
+ src_v += 8;
+ }
+}
+
+void I422AlphaToARGBRow_LSX(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ const uint8_t* src_a,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ int x;
+ int len = width / 16;
+ int res = width & 15;
+ __m128i vec_yb, vec_yg, vec_ub, vec_vr, vec_ug, vec_vg;
+ __m128i vec_ubvr, vec_ugvg;
+ __m128i zero = __lsx_vldi(0);
+ __m128i const_80 = __lsx_vldi(0x80);
+
+ YUVTORGB_SETUP(yuvconstants, vec_vr, vec_ub, vec_vg, vec_ug, vec_yg, vec_yb);
+ vec_ubvr = __lsx_vilvl_h(vec_ub, vec_vr);
+ vec_ugvg = __lsx_vilvl_h(vec_ug, vec_vg);
+
+ for (x = 0; x < len; x++) {
+ __m128i y, uv_l, uv_h, b_l, b_h, g_l, g_h, r_l, r_h, a_l, a_h;
+
+ y = __lsx_vld(src_a, 0);
+ a_l = __lsx_vilvl_b(zero, y);
+ a_h = __lsx_vilvh_b(zero, y);
+ READYUV422_D(src_y, src_u, src_v, y, uv_l, uv_h);
+ YUVTORGB_D(y, uv_l, uv_h, vec_ubvr, vec_ugvg, vec_yg, vec_yb, b_l, b_h, g_l,
+ g_h, r_l, r_h);
+ STOREARGB_D(a_l, a_h, r_l, r_h, g_l, g_h, b_l, b_h, dst_argb);
+ src_y += 16;
+ src_u += 8;
+ src_v += 8;
+ src_a += 16;
+ }
+ if (res) {
+ __m128i y, uv, r, g, b, a;
+ a = __lsx_vld(src_a, 0);
+ a = __lsx_vsllwil_hu_bu(a, 0);
+ READYUV422(src_y, src_u, src_v, y, uv);
+ YUVTORGB(y, uv, vec_ubvr, vec_ugvg, vec_yg, vec_yb, b, g, r);
+ STOREARGB(a, r, g, b, dst_argb);
+ }
+}
+
+void I422ToRGB24Row_LSX(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int32_t width) {
+ int x;
+ int len = width / 16;
+ __m128i vec_yb, vec_yg, vec_ub, vec_vr, vec_ug, vec_vg;
+ __m128i vec_ubvr, vec_ugvg;
+ __m128i const_80 = __lsx_vldi(0x80);
+ __m128i shuffler0 = {0x0504120302100100, 0x0A18090816070614};
+ __m128i shuffler1 = {0x1E0F0E1C0D0C1A0B, 0x1E0F0E1C0D0C1A0B};
+
+ YUVTORGB_SETUP(yuvconstants, vec_vr, vec_ub, vec_vg, vec_ug, vec_yg, vec_yb);
+ vec_ubvr = __lsx_vilvl_h(vec_ub, vec_vr);
+ vec_ugvg = __lsx_vilvl_h(vec_ug, vec_vg);
+
+ for (x = 0; x < len; x++) {
+ __m128i y, uv_l, uv_h, b_l, b_h, g_l, g_h, r_l, r_h;
+ __m128i temp0, temp1, temp2, temp3;
+
+ READYUV422_D(src_y, src_u, src_v, y, uv_l, uv_h);
+ YUVTORGB_D(y, uv_l, uv_h, vec_ubvr, vec_ugvg, vec_yg, vec_yb, b_l, b_h, g_l,
+ g_h, r_l, r_h);
+ temp0 = __lsx_vpackev_b(g_l, b_l);
+ temp1 = __lsx_vpackev_b(g_h, b_h);
+ DUP4_ARG3(__lsx_vshuf_b, r_l, temp0, shuffler1, r_h, temp1, shuffler1, r_l,
+ temp0, shuffler0, r_h, temp1, shuffler0, temp2, temp3, temp0,
+ temp1);
+
+ b_l = __lsx_vilvl_d(temp1, temp2);
+ b_h = __lsx_vilvh_d(temp3, temp1);
+ __lsx_vst(temp0, dst_argb, 0);
+ __lsx_vst(b_l, dst_argb, 16);
+ __lsx_vst(b_h, dst_argb, 32);
+ dst_argb += 48;
+ src_y += 16;
+ src_u += 8;
+ src_v += 8;
+ }
+}
+
+// TODO(fbarchard): Consider AND instead of shift to isolate 5 upper bits of R.
+void I422ToRGB565Row_LSX(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_rgb565,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ int x;
+ int len = width / 16;
+ __m128i vec_yb, vec_yg, vec_ub, vec_vr, vec_ug, vec_vg;
+ __m128i vec_ubvr, vec_ugvg;
+ __m128i const_80 = __lsx_vldi(0x80);
+
+ YUVTORGB_SETUP(yuvconstants, vec_vr, vec_ub, vec_vg, vec_ug, vec_yg, vec_yb);
+ vec_ubvr = __lsx_vilvl_h(vec_ub, vec_vr);
+ vec_ugvg = __lsx_vilvl_h(vec_ug, vec_vg);
+
+ for (x = 0; x < len; x++) {
+ __m128i y, uv_l, uv_h, b_l, b_h, g_l, g_h, r_l, r_h;
+
+ READYUV422_D(src_y, src_u, src_v, y, uv_l, uv_h);
+ YUVTORGB_D(y, uv_l, uv_h, vec_ubvr, vec_ugvg, vec_yg, vec_yb, b_l, b_h, g_l,
+ g_h, r_l, r_h);
+ b_l = __lsx_vsrli_h(b_l, 3);
+ b_h = __lsx_vsrli_h(b_h, 3);
+ g_l = __lsx_vsrli_h(g_l, 2);
+ g_h = __lsx_vsrli_h(g_h, 2);
+ r_l = __lsx_vsrli_h(r_l, 3);
+ r_h = __lsx_vsrli_h(r_h, 3);
+ r_l = __lsx_vslli_h(r_l, 11);
+ r_h = __lsx_vslli_h(r_h, 11);
+ g_l = __lsx_vslli_h(g_l, 5);
+ g_h = __lsx_vslli_h(g_h, 5);
+ r_l = __lsx_vor_v(r_l, g_l);
+ r_l = __lsx_vor_v(r_l, b_l);
+ r_h = __lsx_vor_v(r_h, g_h);
+ r_h = __lsx_vor_v(r_h, b_h);
+ __lsx_vst(r_l, dst_rgb565, 0);
+ __lsx_vst(r_h, dst_rgb565, 16);
+ dst_rgb565 += 32;
+ src_y += 16;
+ src_u += 8;
+ src_v += 8;
+ }
+}
+
+// TODO(fbarchard): Consider AND instead of shift to isolate 4 upper bits of G.
+void I422ToARGB4444Row_LSX(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_argb4444,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ int x;
+ int len = width / 16;
+ __m128i vec_yb, vec_yg, vec_ub, vec_vr, vec_ug, vec_vg;
+ __m128i vec_ubvr, vec_ugvg;
+ __m128i const_80 = __lsx_vldi(0x80);
+ __m128i alpha = (__m128i)v2u64{0xF000F000F000F000, 0xF000F000F000F000};
+ __m128i mask = {0x00F000F000F000F0, 0x00F000F000F000F0};
+
+ YUVTORGB_SETUP(yuvconstants, vec_vr, vec_ub, vec_vg, vec_ug, vec_yg, vec_yb);
+ vec_ubvr = __lsx_vilvl_h(vec_ub, vec_vr);
+ vec_ugvg = __lsx_vilvl_h(vec_ug, vec_vg);
+
+ for (x = 0; x < len; x++) {
+ __m128i y, uv_l, uv_h, b_l, b_h, g_l, g_h, r_l, r_h;
+
+ READYUV422_D(src_y, src_u, src_v, y, uv_l, uv_h);
+ YUVTORGB_D(y, uv_l, uv_h, vec_ubvr, vec_ugvg, vec_yg, vec_yb, b_l, b_h, g_l,
+ g_h, r_l, r_h);
+ b_l = __lsx_vsrli_h(b_l, 4);
+ b_h = __lsx_vsrli_h(b_h, 4);
+ r_l = __lsx_vsrli_h(r_l, 4);
+ r_h = __lsx_vsrli_h(r_h, 4);
+ g_l = __lsx_vand_v(g_l, mask);
+ g_h = __lsx_vand_v(g_h, mask);
+ r_l = __lsx_vslli_h(r_l, 8);
+ r_h = __lsx_vslli_h(r_h, 8);
+ r_l = __lsx_vor_v(r_l, alpha);
+ r_h = __lsx_vor_v(r_h, alpha);
+ r_l = __lsx_vor_v(r_l, g_l);
+ r_h = __lsx_vor_v(r_h, g_h);
+ r_l = __lsx_vor_v(r_l, b_l);
+ r_h = __lsx_vor_v(r_h, b_h);
+ __lsx_vst(r_l, dst_argb4444, 0);
+ __lsx_vst(r_h, dst_argb4444, 16);
+ dst_argb4444 += 32;
+ src_y += 16;
+ src_u += 8;
+ src_v += 8;
+ }
+}
+
+void I422ToARGB1555Row_LSX(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_argb1555,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ int x;
+ int len = width / 16;
+ __m128i vec_yb, vec_yg, vec_ub, vec_vr, vec_ug, vec_vg;
+ __m128i vec_ubvr, vec_ugvg;
+ __m128i const_80 = __lsx_vldi(0x80);
+ __m128i alpha = (__m128i)v2u64{0x8000800080008000, 0x8000800080008000};
+
+ YUVTORGB_SETUP(yuvconstants, vec_vr, vec_ub, vec_vg, vec_ug, vec_yg, vec_yb);
+ vec_ubvr = __lsx_vilvl_h(vec_ub, vec_vr);
+ vec_ugvg = __lsx_vilvl_h(vec_ug, vec_vg);
+
+ for (x = 0; x < len; x++) {
+ __m128i y, uv_l, uv_h, b_l, b_h, g_l, g_h, r_l, r_h;
+
+ READYUV422_D(src_y, src_u, src_v, y, uv_l, uv_h);
+ YUVTORGB_D(y, uv_l, uv_h, vec_ubvr, vec_ugvg, vec_yg, vec_yb, b_l, b_h, g_l,
+ g_h, r_l, r_h);
+ b_l = __lsx_vsrli_h(b_l, 3);
+ b_h = __lsx_vsrli_h(b_h, 3);
+ g_l = __lsx_vsrli_h(g_l, 3);
+
+ g_h = __lsx_vsrli_h(g_h, 3);
+ g_l = __lsx_vslli_h(g_l, 5);
+ g_h = __lsx_vslli_h(g_h, 5);
+ r_l = __lsx_vsrli_h(r_l, 3);
+ r_h = __lsx_vsrli_h(r_h, 3);
+ r_l = __lsx_vslli_h(r_l, 10);
+ r_h = __lsx_vslli_h(r_h, 10);
+ r_l = __lsx_vor_v(r_l, alpha);
+ r_h = __lsx_vor_v(r_h, alpha);
+ r_l = __lsx_vor_v(r_l, g_l);
+ r_h = __lsx_vor_v(r_h, g_h);
+ r_l = __lsx_vor_v(r_l, b_l);
+ r_h = __lsx_vor_v(r_h, b_h);
+ __lsx_vst(r_l, dst_argb1555, 0);
+ __lsx_vst(r_h, dst_argb1555, 16);
+ dst_argb1555 += 32;
+ src_y += 16;
+ src_u += 8;
+ src_v += 8;
+ }
+}
+
+void YUY2ToYRow_LSX(const uint8_t* src_yuy2, uint8_t* dst_y, int width) {
+ int x;
+ int len = width / 16;
+ __m128i src0, src1, dst0;
+
+ for (x = 0; x < len; x++) {
+ DUP2_ARG2(__lsx_vld, src_yuy2, 0, src_yuy2, 16, src0, src1);
+ dst0 = __lsx_vpickev_b(src1, src0);
+ __lsx_vst(dst0, dst_y, 0);
+ src_yuy2 += 32;
+ dst_y += 16;
+ }
+}
+
+void YUY2ToUVRow_LSX(const uint8_t* src_yuy2,
+ int src_stride_yuy2,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width) {
+ const uint8_t* src_yuy2_next = src_yuy2 + src_stride_yuy2;
+ int x;
+ int len = width / 16;
+ __m128i src0, src1, src2, src3;
+ __m128i tmp0, dst0, dst1;
+
+ for (x = 0; x < len; x++) {
+ DUP4_ARG2(__lsx_vld, src_yuy2, 0, src_yuy2, 16, src_yuy2_next, 0,
+ src_yuy2_next, 16, src0, src1, src2, src3);
+ src0 = __lsx_vpickod_b(src1, src0);
+ src1 = __lsx_vpickod_b(src3, src2);
+ tmp0 = __lsx_vavgr_bu(src1, src0);
+ dst0 = __lsx_vpickev_b(tmp0, tmp0);
+ dst1 = __lsx_vpickod_b(tmp0, tmp0);
+ __lsx_vstelm_d(dst0, dst_u, 0, 0);
+ __lsx_vstelm_d(dst1, dst_v, 0, 0);
+ src_yuy2 += 32;
+ src_yuy2_next += 32;
+ dst_u += 8;
+ dst_v += 8;
+ }
+}
+
+void YUY2ToUV422Row_LSX(const uint8_t* src_yuy2,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width) {
+ int x;
+ int len = width / 16;
+ __m128i src0, src1, tmp0, dst0, dst1;
+
+ for (x = 0; x < len; x++) {
+ DUP2_ARG2(__lsx_vld, src_yuy2, 0, src_yuy2, 16, src0, src1);
+ tmp0 = __lsx_vpickod_b(src1, src0);
+ dst0 = __lsx_vpickev_b(tmp0, tmp0);
+ dst1 = __lsx_vpickod_b(tmp0, tmp0);
+ __lsx_vstelm_d(dst0, dst_u, 0, 0);
+ __lsx_vstelm_d(dst1, dst_v, 0, 0);
+ src_yuy2 += 32;
+ dst_u += 8;
+ dst_v += 8;
+ }
+}
+
+void UYVYToYRow_LSX(const uint8_t* src_uyvy, uint8_t* dst_y, int width) {
+ int x;
+ int len = width / 16;
+ __m128i src0, src1, dst0;
+
+ for (x = 0; x < len; x++) {
+ DUP2_ARG2(__lsx_vld, src_uyvy, 0, src_uyvy, 16, src0, src1);
+ dst0 = __lsx_vpickod_b(src1, src0);
+ __lsx_vst(dst0, dst_y, 0);
+ src_uyvy += 32;
+ dst_y += 16;
+ }
+}
+
+void UYVYToUVRow_LSX(const uint8_t* src_uyvy,
+ int src_stride_uyvy,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width) {
+ const uint8_t* src_uyvy_next = src_uyvy + src_stride_uyvy;
+ int x;
+ int len = width / 16;
+ __m128i src0, src1, src2, src3, tmp0, dst0, dst1;
+
+ for (x = 0; x < len; x++) {
+ DUP4_ARG2(__lsx_vld, src_uyvy, 0, src_uyvy, 16, src_uyvy_next, 0,
+ src_uyvy_next, 16, src0, src1, src2, src3);
+ src0 = __lsx_vpickev_b(src1, src0);
+ src1 = __lsx_vpickev_b(src3, src2);
+ tmp0 = __lsx_vavgr_bu(src1, src0);
+ dst0 = __lsx_vpickev_b(tmp0, tmp0);
+ dst1 = __lsx_vpickod_b(tmp0, tmp0);
+ __lsx_vstelm_d(dst0, dst_u, 0, 0);
+ __lsx_vstelm_d(dst1, dst_v, 0, 0);
+ src_uyvy += 32;
+ src_uyvy_next += 32;
+ dst_u += 8;
+ dst_v += 8;
+ }
+}
+
+void UYVYToUV422Row_LSX(const uint8_t* src_uyvy,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width) {
+ int x;
+ int len = width / 16;
+ __m128i src0, src1, tmp0, dst0, dst1;
+
+ for (x = 0; x < len; x++) {
+ DUP2_ARG2(__lsx_vld, src_uyvy, 0, src_uyvy, 16, src0, src1);
+ tmp0 = __lsx_vpickev_b(src1, src0);
+ dst0 = __lsx_vpickev_b(tmp0, tmp0);
+ dst1 = __lsx_vpickod_b(tmp0, tmp0);
+ __lsx_vstelm_d(dst0, dst_u, 0, 0);
+ __lsx_vstelm_d(dst1, dst_v, 0, 0);
+ src_uyvy += 32;
+ dst_u += 8;
+ dst_v += 8;
+ }
+}
+
+void ARGBToUVRow_LSX(const uint8_t* src_argb0,
+ int src_stride_argb,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width) {
+ int x;
+ int len = width / 16;
+ const uint8_t* src_argb1 = src_argb0 + src_stride_argb;
+
+ __m128i src0, src1, src2, src3, src4, src5, src6, src7;
+ __m128i vec0, vec1, vec2, vec3;
+ __m128i tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, dst0, dst1;
+ __m128i const_0x70 = {0x0038003800380038, 0x0038003800380038};
+ __m128i const_0x4A = {0x0025002500250025, 0x0025002500250025};
+ __m128i const_0x26 = {0x0013001300130013, 0x0013001300130013};
+ __m128i const_0x5E = {0x002f002f002f002f, 0x002f002f002f002f};
+ __m128i const_0x12 = {0x0009000900090009, 0x0009000900090009};
+ __m128i const_0x8080 = (__m128i)v2u64{0x8080808080808080, 0x8080808080808080};
+ for (x = 0; x < len; x++) {
+ DUP4_ARG2(__lsx_vld, src_argb0, 0, src_argb0, 16, src_argb0, 32, src_argb0,
+ 48, src0, src1, src2, src3);
+ DUP4_ARG2(__lsx_vld, src_argb1, 0, src_argb1, 16, src_argb1, 32, src_argb1,
+ 48, src4, src5, src6, src7);
+ vec0 = __lsx_vaddwev_h_bu(src0, src4);
+ vec1 = __lsx_vaddwev_h_bu(src1, src5);
+ vec2 = __lsx_vaddwev_h_bu(src2, src6);
+ vec3 = __lsx_vaddwev_h_bu(src3, src7);
+ tmp0 = __lsx_vpickev_h(vec1, vec0);
+ tmp1 = __lsx_vpickev_h(vec3, vec2);
+ tmp2 = __lsx_vpickod_h(vec1, vec0);
+ tmp3 = __lsx_vpickod_h(vec3, vec2);
+ vec0 = __lsx_vaddwod_h_bu(src0, src4);
+ vec1 = __lsx_vaddwod_h_bu(src1, src5);
+ vec2 = __lsx_vaddwod_h_bu(src2, src6);
+ vec3 = __lsx_vaddwod_h_bu(src3, src7);
+ tmp4 = __lsx_vpickev_h(vec1, vec0);
+ tmp5 = __lsx_vpickev_h(vec3, vec2);
+ vec0 = __lsx_vpickev_h(tmp1, tmp0);
+ vec1 = __lsx_vpickod_h(tmp1, tmp0);
+ src0 = __lsx_vavgr_h(vec0, vec1);
+ vec0 = __lsx_vpickev_h(tmp3, tmp2);
+ vec1 = __lsx_vpickod_h(tmp3, tmp2);
+ src1 = __lsx_vavgr_h(vec0, vec1);
+ vec0 = __lsx_vpickev_h(tmp5, tmp4);
+ vec1 = __lsx_vpickod_h(tmp5, tmp4);
+ src2 = __lsx_vavgr_h(vec0, vec1);
+ dst0 = __lsx_vmadd_h(const_0x8080, src0, const_0x70);
+ dst0 = __lsx_vmsub_h(dst0, src2, const_0x4A);
+ dst0 = __lsx_vmsub_h(dst0, src1, const_0x26);
+ dst1 = __lsx_vmadd_h(const_0x8080, src1, const_0x70);
+ dst1 = __lsx_vmsub_h(dst1, src2, const_0x5E);
+ dst1 = __lsx_vmsub_h(dst1, src0, const_0x12);
+ dst0 = __lsx_vsrai_h(dst0, 8);
+ dst1 = __lsx_vsrai_h(dst1, 8);
+ dst0 = __lsx_vpickev_b(dst1, dst0);
+ __lsx_vstelm_d(dst0, dst_u, 0, 0);
+ __lsx_vstelm_d(dst0, dst_v, 0, 1);
+ src_argb0 += 64;
+ src_argb1 += 64;
+ dst_u += 8;
+ dst_v += 8;
+ }
+}
+
+void ARGBToRGB24Row_LSX(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
+ int x;
+ int len = (width / 16) - 1;
+ __m128i src0, src1, src2, src3;
+ __m128i tmp0, tmp1, tmp2, tmp3;
+ __m128i shuf = {0x0908060504020100, 0x000000000E0D0C0A};
+ for (x = 0; x < len; x++) {
+ DUP4_ARG2(__lsx_vld, src_argb, 0, src_argb, 16, src_argb, 32, src_argb, 48,
+ src0, src1, src2, src3);
+ tmp0 = __lsx_vshuf_b(src0, src0, shuf);
+ tmp1 = __lsx_vshuf_b(src1, src1, shuf);
+ tmp2 = __lsx_vshuf_b(src2, src2, shuf);
+ tmp3 = __lsx_vshuf_b(src3, src3, shuf);
+ __lsx_vst(tmp0, dst_rgb, 0);
+ __lsx_vst(tmp1, dst_rgb, 12);
+ __lsx_vst(tmp2, dst_rgb, 24);
+ __lsx_vst(tmp3, dst_rgb, 36);
+ dst_rgb += 48;
+ src_argb += 64;
+ }
+ DUP4_ARG2(__lsx_vld, src_argb, 0, src_argb, 16, src_argb, 32, src_argb, 48,
+ src0, src1, src2, src3);
+ tmp0 = __lsx_vshuf_b(src0, src0, shuf);
+ tmp1 = __lsx_vshuf_b(src1, src1, shuf);
+ tmp2 = __lsx_vshuf_b(src2, src2, shuf);
+ tmp3 = __lsx_vshuf_b(src3, src3, shuf);
+ __lsx_vst(tmp0, dst_rgb, 0);
+ __lsx_vst(tmp1, dst_rgb, 12);
+ __lsx_vst(tmp2, dst_rgb, 24);
+ dst_rgb += 36;
+ __lsx_vst(tmp3, dst_rgb, 0);
+}
+
+void ARGBToRAWRow_LSX(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
+ int x;
+ int len = (width / 16) - 1;
+ __m128i src0, src1, src2, src3;
+ __m128i tmp0, tmp1, tmp2, tmp3;
+ __m128i shuf = {0x090A040506000102, 0x000000000C0D0E08};
+ for (x = 0; x < len; x++) {
+ DUP4_ARG2(__lsx_vld, src_argb, 0, src_argb, 16, src_argb, 32, src_argb, 48,
+ src0, src1, src2, src3);
+ tmp0 = __lsx_vshuf_b(src0, src0, shuf);
+ tmp1 = __lsx_vshuf_b(src1, src1, shuf);
+ tmp2 = __lsx_vshuf_b(src2, src2, shuf);
+ tmp3 = __lsx_vshuf_b(src3, src3, shuf);
+ __lsx_vst(tmp0, dst_rgb, 0);
+ __lsx_vst(tmp1, dst_rgb, 12);
+ __lsx_vst(tmp2, dst_rgb, 24);
+ __lsx_vst(tmp3, dst_rgb, 36);
+ dst_rgb += 48;
+ src_argb += 64;
+ }
+ DUP4_ARG2(__lsx_vld, src_argb, 0, src_argb, 16, src_argb, 32, src_argb, 48,
+ src0, src1, src2, src3);
+ tmp0 = __lsx_vshuf_b(src0, src0, shuf);
+ tmp1 = __lsx_vshuf_b(src1, src1, shuf);
+ tmp2 = __lsx_vshuf_b(src2, src2, shuf);
+ tmp3 = __lsx_vshuf_b(src3, src3, shuf);
+ __lsx_vst(tmp0, dst_rgb, 0);
+ __lsx_vst(tmp1, dst_rgb, 12);
+ __lsx_vst(tmp2, dst_rgb, 24);
+ dst_rgb += 36;
+ __lsx_vst(tmp3, dst_rgb, 0);
+}
+
+void ARGBToRGB565Row_LSX(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
+ int x;
+ int len = width / 8;
+ __m128i zero = __lsx_vldi(0);
+ __m128i src0, src1, tmp0, tmp1, dst0;
+ __m128i shift = {0x0300030003000300, 0x0300030003000300};
+
+ for (x = 0; x < len; x++) {
+ DUP2_ARG2(__lsx_vld, src_argb, 0, src_argb, 16, src0, src1);
+ tmp0 = __lsx_vpickev_b(src1, src0);
+ tmp1 = __lsx_vpickod_b(src1, src0);
+ tmp0 = __lsx_vsrli_b(tmp0, 3);
+ tmp1 = __lsx_vpackev_b(zero, tmp1);
+ tmp1 = __lsx_vsrli_h(tmp1, 2);
+ tmp0 = __lsx_vsll_b(tmp0, shift);
+ tmp1 = __lsx_vslli_h(tmp1, 5);
+ dst0 = __lsx_vor_v(tmp0, tmp1);
+ __lsx_vst(dst0, dst_rgb, 0);
+ dst_rgb += 16;
+ src_argb += 32;
+ }
+}
+
+void ARGBToARGB1555Row_LSX(const uint8_t* src_argb,
+ uint8_t* dst_rgb,
+ int width) {
+ int x;
+ int len = width / 8;
+ __m128i zero = __lsx_vldi(0);
+ __m128i src0, src1, tmp0, tmp1, tmp2, tmp3, dst0;
+ __m128i shift1 = {0x0703070307030703, 0x0703070307030703};
+ __m128i shift2 = {0x0200020002000200, 0x0200020002000200};
+
+ for (x = 0; x < len; x++) {
+ DUP2_ARG2(__lsx_vld, src_argb, 0, src_argb, 16, src0, src1);
+ tmp0 = __lsx_vpickev_b(src1, src0);
+ tmp1 = __lsx_vpickod_b(src1, src0);
+ tmp0 = __lsx_vsrli_b(tmp0, 3);
+ tmp1 = __lsx_vsrl_b(tmp1, shift1);
+ tmp0 = __lsx_vsll_b(tmp0, shift2);
+ tmp2 = __lsx_vpackev_b(zero, tmp1);
+ tmp3 = __lsx_vpackod_b(zero, tmp1);
+ tmp2 = __lsx_vslli_h(tmp2, 5);
+ tmp3 = __lsx_vslli_h(tmp3, 15);
+ dst0 = __lsx_vor_v(tmp0, tmp2);
+ dst0 = __lsx_vor_v(dst0, tmp3);
+ __lsx_vst(dst0, dst_rgb, 0);
+ dst_rgb += 16;
+ src_argb += 32;
+ }
+}
+
+void ARGBToARGB4444Row_LSX(const uint8_t* src_argb,
+ uint8_t* dst_rgb,
+ int width) {
+ int x;
+ int len = width / 8;
+ __m128i src0, src1, tmp0, tmp1, dst0;
+
+ for (x = 0; x < len; x++) {
+ DUP2_ARG2(__lsx_vld, src_argb, 0, src_argb, 16, src0, src1);
+ tmp0 = __lsx_vpickev_b(src1, src0);
+ tmp1 = __lsx_vpickod_b(src1, src0);
+ tmp1 = __lsx_vandi_b(tmp1, 0xF0);
+ tmp0 = __lsx_vsrli_b(tmp0, 4);
+ dst0 = __lsx_vor_v(tmp1, tmp0);
+ __lsx_vst(dst0, dst_rgb, 0);
+ dst_rgb += 16;
+ src_argb += 32;
+ }
+}
+
+void ARGBToUV444Row_LSX(const uint8_t* src_argb,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int32_t width) {
+ int x;
+ int len = width / 16;
+ __m128i src0, src1, src2, src3;
+ __m128i tmp0, tmp1, tmp2, tmp3;
+ __m128i reg0, reg1, reg2, reg3, dst0, dst1;
+ __m128i const_112 = __lsx_vldi(112);
+ __m128i const_74 = __lsx_vldi(74);
+ __m128i const_38 = __lsx_vldi(38);
+ __m128i const_94 = __lsx_vldi(94);
+ __m128i const_18 = __lsx_vldi(18);
+ __m128i const_0x8080 = (__m128i)v2u64{0x8080808080808080, 0x8080808080808080};
+ for (x = 0; x < len; x++) {
+ DUP4_ARG2(__lsx_vld, src_argb, 0, src_argb, 16, src_argb, 32, src_argb, 48,
+ src0, src1, src2, src3);
+ tmp0 = __lsx_vpickev_h(src1, src0);
+ tmp1 = __lsx_vpickod_h(src1, src0);
+ tmp2 = __lsx_vpickev_h(src3, src2);
+ tmp3 = __lsx_vpickod_h(src3, src2);
+ reg0 = __lsx_vmaddwev_h_bu(const_0x8080, tmp0, const_112);
+ reg1 = __lsx_vmaddwev_h_bu(const_0x8080, tmp2, const_112);
+ reg2 = __lsx_vmulwod_h_bu(tmp0, const_74);
+ reg3 = __lsx_vmulwod_h_bu(tmp2, const_74);
+ reg2 = __lsx_vmaddwev_h_bu(reg2, tmp1, const_38);
+ reg3 = __lsx_vmaddwev_h_bu(reg3, tmp3, const_38);
+ reg0 = __lsx_vsub_h(reg0, reg2);
+ reg1 = __lsx_vsub_h(reg1, reg3);
+ reg0 = __lsx_vsrai_h(reg0, 8);
+ reg1 = __lsx_vsrai_h(reg1, 8);
+ dst0 = __lsx_vpickev_b(reg1, reg0);
+
+ reg0 = __lsx_vmaddwev_h_bu(const_0x8080, tmp1, const_112);
+ reg1 = __lsx_vmaddwev_h_bu(const_0x8080, tmp3, const_112);
+ reg2 = __lsx_vmulwev_h_bu(tmp0, const_18);
+ reg3 = __lsx_vmulwev_h_bu(tmp2, const_18);
+ reg2 = __lsx_vmaddwod_h_bu(reg2, tmp0, const_94);
+ reg3 = __lsx_vmaddwod_h_bu(reg3, tmp2, const_94);
+ reg0 = __lsx_vsub_h(reg0, reg2);
+ reg1 = __lsx_vsub_h(reg1, reg3);
+ reg0 = __lsx_vsrai_h(reg0, 8);
+ reg1 = __lsx_vsrai_h(reg1, 8);
+ dst1 = __lsx_vpickev_b(reg1, reg0);
+
+ __lsx_vst(dst0, dst_u, 0);
+ __lsx_vst(dst1, dst_v, 0);
+ dst_u += 16;
+ dst_v += 16;
+ src_argb += 64;
+ }
+}
+
+void ARGBMultiplyRow_LSX(const uint8_t* src_argb0,
+ const uint8_t* src_argb1,
+ uint8_t* dst_argb,
+ int width) {
+ int x;
+ int len = width / 4;
+ __m128i zero = __lsx_vldi(0);
+ __m128i src0, src1, dst0, dst1;
+ __m128i tmp0, tmp1, tmp2, tmp3;
+
+ for (x = 0; x < len; x++) {
+ DUP2_ARG2(__lsx_vld, src_argb0, 0, src_argb1, 0, src0, src1);
+ tmp0 = __lsx_vilvl_b(src0, src0);
+ tmp1 = __lsx_vilvh_b(src0, src0);
+ tmp2 = __lsx_vilvl_b(zero, src1);
+ tmp3 = __lsx_vilvh_b(zero, src1);
+ dst0 = __lsx_vmuh_hu(tmp0, tmp2);
+ dst1 = __lsx_vmuh_hu(tmp1, tmp3);
+ dst0 = __lsx_vpickev_b(dst1, dst0);
+ __lsx_vst(dst0, dst_argb, 0);
+ src_argb0 += 16;
+ src_argb1 += 16;
+ dst_argb += 16;
+ }
+}
+
+void ARGBAddRow_LSX(const uint8_t* src_argb0,
+ const uint8_t* src_argb1,
+ uint8_t* dst_argb,
+ int width) {
+ int x;
+ int len = width / 4;
+ __m128i src0, src1, dst0;
+
+ for (x = 0; x < len; x++) {
+ DUP2_ARG2(__lsx_vld, src_argb0, 0, src_argb1, 0, src0, src1);
+ dst0 = __lsx_vsadd_bu(src0, src1);
+ __lsx_vst(dst0, dst_argb, 0);
+ src_argb0 += 16;
+ src_argb1 += 16;
+ dst_argb += 16;
+ }
+}
+
+void ARGBSubtractRow_LSX(const uint8_t* src_argb0,
+ const uint8_t* src_argb1,
+ uint8_t* dst_argb,
+ int width) {
+ int x;
+ int len = width / 4;
+ __m128i src0, src1, dst0;
+
+ for (x = 0; x < len; x++) {
+ DUP2_ARG2(__lsx_vld, src_argb0, 0, src_argb1, 0, src0, src1);
+ dst0 = __lsx_vssub_bu(src0, src1);
+ __lsx_vst(dst0, dst_argb, 0);
+ src_argb0 += 16;
+ src_argb1 += 16;
+ dst_argb += 16;
+ }
+}
+
+void ARGBAttenuateRow_LSX(const uint8_t* src_argb,
+ uint8_t* dst_argb,
+ int width) {
+ int x;
+ int len = width / 8;
+ __m128i src0, src1, tmp0, tmp1;
+ __m128i reg0, reg1, reg2, reg3, reg4, reg5;
+ __m128i b, g, r, a, dst0, dst1;
+ __m128i control = {0x0005000100040000, 0x0007000300060002};
+
+ for (x = 0; x < len; x++) {
+ DUP2_ARG2(__lsx_vld, src_argb, 0, src_argb, 16, src0, src1);
+ tmp0 = __lsx_vpickev_b(src1, src0);
+ tmp1 = __lsx_vpickod_b(src1, src0);
+ b = __lsx_vpackev_b(tmp0, tmp0);
+ r = __lsx_vpackod_b(tmp0, tmp0);
+ g = __lsx_vpackev_b(tmp1, tmp1);
+ a = __lsx_vpackod_b(tmp1, tmp1);
+ reg0 = __lsx_vmulwev_w_hu(b, a);
+ reg1 = __lsx_vmulwod_w_hu(b, a);
+ reg2 = __lsx_vmulwev_w_hu(r, a);
+ reg3 = __lsx_vmulwod_w_hu(r, a);
+ reg4 = __lsx_vmulwev_w_hu(g, a);
+ reg5 = __lsx_vmulwod_w_hu(g, a);
+ reg0 = __lsx_vssrani_h_w(reg1, reg0, 24);
+ reg2 = __lsx_vssrani_h_w(reg3, reg2, 24);
+ reg4 = __lsx_vssrani_h_w(reg5, reg4, 24);
+ reg0 = __lsx_vshuf_h(control, reg0, reg0);
+ reg2 = __lsx_vshuf_h(control, reg2, reg2);
+ reg4 = __lsx_vshuf_h(control, reg4, reg4);
+ tmp0 = __lsx_vpackev_b(reg4, reg0);
+ tmp1 = __lsx_vpackev_b(a, reg2);
+ dst0 = __lsx_vilvl_h(tmp1, tmp0);
+ dst1 = __lsx_vilvh_h(tmp1, tmp0);
+ __lsx_vst(dst0, dst_argb, 0);
+ __lsx_vst(dst1, dst_argb, 16);
+ dst_argb += 32;
+ src_argb += 32;
+ }
+}
+
+void ARGBToRGB565DitherRow_LSX(const uint8_t* src_argb,
+ uint8_t* dst_rgb,
+ uint32_t dither4,
+ int width) {
+ int x;
+ int len = width / 8;
+ __m128i src0, src1, tmp0, tmp1, dst0;
+ __m128i b, g, r;
+ __m128i zero = __lsx_vldi(0);
+ __m128i vec_dither = __lsx_vldrepl_w(&dither4, 0);
+
+ vec_dither = __lsx_vilvl_b(zero, vec_dither);
+ for (x = 0; x < len; x++) {
+ DUP2_ARG2(__lsx_vld, src_argb, 0, src_argb, 16, src0, src1);
+ tmp0 = __lsx_vpickev_b(src1, src0);
+ tmp1 = __lsx_vpickod_b(src1, src0);
+ b = __lsx_vpackev_b(zero, tmp0);
+ r = __lsx_vpackod_b(zero, tmp0);
+ g = __lsx_vpackev_b(zero, tmp1);
+ b = __lsx_vadd_h(b, vec_dither);
+ g = __lsx_vadd_h(g, vec_dither);
+ r = __lsx_vadd_h(r, vec_dither);
+ DUP2_ARG1(__lsx_vclip255_h, b, g, b, g);
+ r = __lsx_vclip255_h(r);
+ b = __lsx_vsrai_h(b, 3);
+ g = __lsx_vsrai_h(g, 2);
+ r = __lsx_vsrai_h(r, 3);
+ g = __lsx_vslli_h(g, 5);
+ r = __lsx_vslli_h(r, 11);
+ dst0 = __lsx_vor_v(b, g);
+ dst0 = __lsx_vor_v(dst0, r);
+ __lsx_vst(dst0, dst_rgb, 0);
+ src_argb += 32;
+ dst_rgb += 16;
+ }
+}
+
+void ARGBShuffleRow_LSX(const uint8_t* src_argb,
+ uint8_t* dst_argb,
+ const uint8_t* shuffler,
+ int width) {
+ int x;
+ int len = width / 8;
+ __m128i src0, src1, dst0, dst1;
+ __m128i shuf = {0x0404040400000000, 0x0C0C0C0C08080808};
+ __m128i temp = __lsx_vldrepl_w(shuffler, 0);
+
+ shuf = __lsx_vadd_b(shuf, temp);
+ for (x = 0; x < len; x++) {
+ DUP2_ARG2(__lsx_vld, src_argb, 0, src_argb, 16, src0, src1);
+ dst0 = __lsx_vshuf_b(src0, src0, shuf);
+ dst1 = __lsx_vshuf_b(src1, src1, shuf);
+ __lsx_vst(dst0, dst_argb, 0);
+ __lsx_vst(dst1, dst_argb, 16);
+ src_argb += 32;
+ dst_argb += 32;
+ }
+}
+
+void ARGBShadeRow_LSX(const uint8_t* src_argb,
+ uint8_t* dst_argb,
+ int width,
+ uint32_t value) {
+ int x;
+ int len = width / 4;
+ __m128i src0, dst0, tmp0, tmp1;
+ __m128i vec_value = __lsx_vreplgr2vr_w(value);
+
+ vec_value = __lsx_vilvl_b(vec_value, vec_value);
+ for (x = 0; x < len; x++) {
+ src0 = __lsx_vld(src_argb, 0);
+ tmp0 = __lsx_vilvl_b(src0, src0);
+ tmp1 = __lsx_vilvh_b(src0, src0);
+ tmp0 = __lsx_vmuh_hu(tmp0, vec_value);
+ tmp1 = __lsx_vmuh_hu(tmp1, vec_value);
+ dst0 = __lsx_vpickod_b(tmp1, tmp0);
+ __lsx_vst(dst0, dst_argb, 0);
+ src_argb += 16;
+ dst_argb += 16;
+ }
+}
+
+void ARGBGrayRow_LSX(const uint8_t* src_argb, uint8_t* dst_argb, int width) {
+ int x;
+ int len = width / 8;
+ __m128i src0, src1, tmp0, tmp1;
+ __m128i reg0, reg1, reg2, dst0, dst1;
+ __m128i const_128 = __lsx_vldi(0x480);
+ __m128i const_150 = __lsx_vldi(0x96);
+ __m128i const_br = {0x4D1D4D1D4D1D4D1D, 0x4D1D4D1D4D1D4D1D};
+
+ for (x = 0; x < len; x++) {
+ DUP2_ARG2(__lsx_vld, src_argb, 0, src_argb, 16, src0, src1);
+ tmp0 = __lsx_vpickev_b(src1, src0);
+ tmp1 = __lsx_vpickod_b(src1, src0);
+ reg0 = __lsx_vdp2_h_bu(tmp0, const_br);
+ reg1 = __lsx_vmaddwev_h_bu(const_128, tmp1, const_150);
+ reg2 = __lsx_vadd_h(reg0, reg1);
+ tmp0 = __lsx_vpackod_b(reg2, reg2);
+ tmp1 = __lsx_vpackod_b(tmp1, reg2);
+ dst0 = __lsx_vilvl_h(tmp1, tmp0);
+ dst1 = __lsx_vilvh_h(tmp1, tmp0);
+ __lsx_vst(dst0, dst_argb, 0);
+ __lsx_vst(dst1, dst_argb, 16);
+ src_argb += 32;
+ dst_argb += 32;
+ }
+}
+
+void ARGBSepiaRow_LSX(uint8_t* dst_argb, int width) {
+ int x;
+ int len = width / 8;
+ __m128i src0, src1, tmp0, tmp1;
+ __m128i reg0, reg1, spb, spg, spr;
+ __m128i dst0, dst1;
+ __m128i spb_g = __lsx_vldi(68);
+ __m128i spg_g = __lsx_vldi(88);
+ __m128i spr_g = __lsx_vldi(98);
+ __m128i spb_br = {0x2311231123112311, 0x2311231123112311};
+ __m128i spg_br = {0x2D162D162D162D16, 0x2D162D162D162D16};
+ __m128i spr_br = {0x3218321832183218, 0x3218321832183218};
+ __m128i shuff = {0x1706150413021100, 0x1F0E1D0C1B0A1908};
+
+ for (x = 0; x < len; x++) {
+ DUP2_ARG2(__lsx_vld, dst_argb, 0, dst_argb, 16, src0, src1);
+ tmp0 = __lsx_vpickev_b(src1, src0);
+ tmp1 = __lsx_vpickod_b(src1, src0);
+ DUP2_ARG2(__lsx_vdp2_h_bu, tmp0, spb_br, tmp0, spg_br, spb, spg);
+ spr = __lsx_vdp2_h_bu(tmp0, spr_br);
+ spb = __lsx_vmaddwev_h_bu(spb, tmp1, spb_g);
+ spg = __lsx_vmaddwev_h_bu(spg, tmp1, spg_g);
+ spr = __lsx_vmaddwev_h_bu(spr, tmp1, spr_g);
+ spb = __lsx_vsrli_h(spb, 7);
+ spg = __lsx_vsrli_h(spg, 7);
+ spr = __lsx_vsrli_h(spr, 7);
+ spg = __lsx_vsat_hu(spg, 7);
+ spr = __lsx_vsat_hu(spr, 7);
+ reg0 = __lsx_vpackev_b(spg, spb);
+ reg1 = __lsx_vshuf_b(tmp1, spr, shuff);
+ dst0 = __lsx_vilvl_h(reg1, reg0);
+ dst1 = __lsx_vilvh_h(reg1, reg0);
+ __lsx_vst(dst0, dst_argb, 0);
+ __lsx_vst(dst1, dst_argb, 16);
+ dst_argb += 32;
+ }
+}
+
void ARGB4444ToARGBRow_LSX(const uint8_t* src_argb4444,
uint8_t* dst_argb,
int width) {
@@ -407,7 +1533,7 @@ void ARGB1555ToUVRow_LSX(const uint8_t* src_argb1555,
__m128i const_38 = __lsx_vldi(0x413);
__m128i const_94 = __lsx_vldi(0x42F);
__m128i const_18 = __lsx_vldi(0x409);
- __m128i const_8080 = {0x8080808080808080, 0x8080808080808080};
+ __m128i const_8080 = (__m128i)v2u64{0x8080808080808080, 0x8080808080808080};
for (x = 0; x < len; x++) {
DUP4_ARG2(__lsx_vld, src_argb1555, 0, src_argb1555, 16, next_argb1555, 0,
@@ -516,7 +1642,7 @@ void RGB565ToUVRow_LSX(const uint8_t* src_rgb565,
__m128i const_38 = __lsx_vldi(0x413);
__m128i const_94 = __lsx_vldi(0x42F);
__m128i const_18 = __lsx_vldi(0x409);
- __m128i const_8080 = {0x8080808080808080, 0x8080808080808080};
+ __m128i const_8080 = (__m128i)v2u64{0x8080808080808080, 0x8080808080808080};
for (x = 0; x < len; x++) {
DUP4_ARG2(__lsx_vld, src_rgb565, 0, src_rgb565, 16, next_rgb565, 0,
@@ -561,39 +1687,6 @@ void RGB565ToUVRow_LSX(const uint8_t* src_rgb565,
}
}
-void RGB24ToYRow_LSX(const uint8_t* src_rgb24, uint8_t* dst_y, int width) {
- int x;
- int len = width / 16;
- __m128i src0, src1, src2;
- __m128i tmp0, tmp1, tmp2, tmp3;
- __m128i reg0, reg1, dst0;
- __m128i const_129 = __lsx_vldi(129);
- __m128i const_br = {0x4219421942194219, 0x4219421942194219};
- __m128i const_1080 = {0x1080108010801080, 0x1080108010801080};
- __m128i shuff0 = {0x0B09080605030200, 0x17151412110F0E0C};
- __m128i shuff1 = {0x0301001E1D1B1A18, 0x0F0D0C0A09070604};
- __m128i shuff2 = {0x000A000700040001, 0x001600130010000D};
- __m128i shuff3 = {0x0002001F001C0019, 0x000E000B00080005};
-
- for (x = 0; x < len; x++) {
- src0 = __lsx_vld(src_rgb24, 0);
- src1 = __lsx_vld(src_rgb24, 16);
- src2 = __lsx_vld(src_rgb24, 32);
- tmp0 = __lsx_vshuf_b(src1, src0, shuff0);
- tmp1 = __lsx_vshuf_b(src1, src2, shuff1);
- tmp2 = __lsx_vshuf_b(src1, src0, shuff2);
- tmp3 = __lsx_vshuf_b(src1, src2, shuff3);
- reg0 = __lsx_vmaddwev_h_bu(const_1080, tmp2, const_129);
- reg1 = __lsx_vmaddwev_h_bu(const_1080, tmp3, const_129);
- reg0 = __lsx_vdp2add_h_bu(reg0, const_br, tmp0);
- reg1 = __lsx_vdp2add_h_bu(reg1, const_br, tmp1);
- dst0 = __lsx_vpickod_b(reg1, reg0);
- __lsx_vst(dst0, dst_y, 0);
- dst_y += 16;
- src_rgb24 += 48;
- }
-}
-
void RGB24ToUVRow_LSX(const uint8_t* src_rgb24,
int src_stride_rgb24,
uint8_t* dst_u,
@@ -610,7 +1703,7 @@ void RGB24ToUVRow_LSX(const uint8_t* src_rgb24,
__m128i const_38 = __lsx_vldi(0x413);
__m128i const_94 = __lsx_vldi(0x42F);
__m128i const_18 = __lsx_vldi(0x409);
- __m128i const_8080 = {0x8080808080808080, 0x8080808080808080};
+ __m128i const_8080 = (__m128i)v2u64{0x8080808080808080, 0x8080808080808080};
__m128i shuff0_b = {0x15120F0C09060300, 0x00000000001E1B18};
__m128i shuff1_b = {0x0706050403020100, 0x1D1A1714110A0908};
__m128i shuff0_g = {0x1613100D0A070401, 0x00000000001F1C19};
@@ -647,39 +1740,6 @@ void RGB24ToUVRow_LSX(const uint8_t* src_rgb24,
}
}
-void RAWToYRow_LSX(const uint8_t* src_raw, uint8_t* dst_y, int width) {
- int x;
- int len = width / 16;
- __m128i src0, src1, src2;
- __m128i tmp0, tmp1, tmp2, tmp3;
- __m128i reg0, reg1, dst0;
- __m128i const_129 = __lsx_vldi(129);
- __m128i const_br = {0x1942194219421942, 0x1942194219421942};
- __m128i const_1080 = {0x1080108010801080, 0x1080108010801080};
- __m128i shuff0 = {0x0B09080605030200, 0x17151412110F0E0C};
- __m128i shuff1 = {0x0301001E1D1B1A18, 0x0F0D0C0A09070604};
- __m128i shuff2 = {0x000A000700040001, 0x001600130010000D};
- __m128i shuff3 = {0x0002001F001C0019, 0x000E000B00080005};
-
- for (x = 0; x < len; x++) {
- src0 = __lsx_vld(src_raw, 0);
- src1 = __lsx_vld(src_raw, 16);
- src2 = __lsx_vld(src_raw, 32);
- tmp0 = __lsx_vshuf_b(src1, src0, shuff0);
- tmp1 = __lsx_vshuf_b(src1, src2, shuff1);
- tmp2 = __lsx_vshuf_b(src1, src0, shuff2);
- tmp3 = __lsx_vshuf_b(src1, src2, shuff3);
- reg0 = __lsx_vmaddwev_h_bu(const_1080, tmp2, const_129);
- reg1 = __lsx_vmaddwev_h_bu(const_1080, tmp3, const_129);
- reg0 = __lsx_vdp2add_h_bu(reg0, const_br, tmp0);
- reg1 = __lsx_vdp2add_h_bu(reg1, const_br, tmp1);
- dst0 = __lsx_vsrlni_b_h(reg1, reg0, 8);
- __lsx_vst(dst0, dst_y, 0);
- dst_y += 16;
- src_raw += 48;
- }
-}
-
void RAWToUVRow_LSX(const uint8_t* src_raw,
int src_stride_raw,
uint8_t* dst_u,
@@ -696,7 +1756,7 @@ void RAWToUVRow_LSX(const uint8_t* src_raw,
__m128i const_38 = __lsx_vldi(0x413);
__m128i const_94 = __lsx_vldi(0x42F);
__m128i const_18 = __lsx_vldi(0x409);
- __m128i const_8080 = {0x8080808080808080, 0x8080808080808080};
+ __m128i const_8080 = (__m128i)v2u64{0x8080808080808080, 0x8080808080808080};
__m128i shuff0_r = {0x15120F0C09060300, 0x00000000001E1B18};
__m128i shuff1_r = {0x0706050403020100, 0x1D1A1714110A0908};
__m128i shuff0_g = {0x1613100D0A070401, 0x00000000001F1C19};
@@ -914,62 +1974,6 @@ void SobelXYRow_LSX(const uint8_t* src_sobelx,
}
}
-void ARGBToYJRow_LSX(const uint8_t* src_argb, uint8_t* dst_y, int width) {
- int x;
- int len = width / 16;
- __m128i src0, src1, src2, src3, dst0;
- __m128i tmp0, tmp1, tmp2, tmp3;
- __m128i reg0, reg1;
- __m128i const_128 = __lsx_vldi(0x480);
- __m128i const_150 = __lsx_vldi(0x96);
- __m128i const_br = {0x4D1D4D1D4D1D4D1D, 0x4D1D4D1D4D1D4D1D};
-
- for (x = 0; x < len; x++) {
- DUP4_ARG2(__lsx_vld, src_argb, 0, src_argb, 16, src_argb, 32, src_argb, 48,
- src0, src1, src2, src3);
- tmp0 = __lsx_vpickev_b(src1, src0);
- tmp1 = __lsx_vpickod_b(src1, src0);
- tmp2 = __lsx_vpickev_b(src3, src2);
- tmp3 = __lsx_vpickod_b(src3, src2);
- reg0 = __lsx_vmaddwev_h_bu(const_128, tmp1, const_150);
- reg1 = __lsx_vmaddwev_h_bu(const_128, tmp3, const_150);
- reg0 = __lsx_vdp2add_h_bu(reg0, const_br, tmp0);
- reg1 = __lsx_vdp2add_h_bu(reg1, const_br, tmp2);
- dst0 = __lsx_vpickod_b(reg1, reg0);
- __lsx_vst(dst0, dst_y, 0);
- dst_y += 16;
- src_argb += 64;
- }
-}
-
-void BGRAToYRow_LSX(const uint8_t* src_bgra, uint8_t* dst_y, int width) {
- int x;
- int len = width / 16;
- __m128i src0, src1, src2, src3, dst0;
- __m128i tmp0, tmp1, tmp2, tmp3;
- __m128i reg0, reg1;
- __m128i const_129 = __lsx_vldi(0x81);
- __m128i const_br = {0x1942194219421942, 0x1942194219421942};
- __m128i const_1080 = {0x1080108010801080, 0x1080108010801080};
-
- for (x = 0; x < len; x++) {
- DUP4_ARG2(__lsx_vld, src_bgra, 0, src_bgra, 16, src_bgra, 32, src_bgra, 48,
- src0, src1, src2, src3);
- tmp0 = __lsx_vpickod_b(src1, src0);
- tmp1 = __lsx_vpickev_b(src1, src0);
- tmp2 = __lsx_vpickod_b(src3, src2);
- tmp3 = __lsx_vpickev_b(src3, src2);
- reg0 = __lsx_vmaddwod_h_bu(const_1080, tmp1, const_129);
- reg1 = __lsx_vmaddwod_h_bu(const_1080, tmp3, const_129);
- reg0 = __lsx_vdp2add_h_bu(reg0, const_br, tmp0);
- reg1 = __lsx_vdp2add_h_bu(reg1, const_br, tmp2);
- dst0 = __lsx_vsrlni_b_h(reg1, reg0, 8);
- __lsx_vst(dst0, dst_y, 0);
- dst_y += 16;
- src_bgra += 64;
- }
-}
-
void BGRAToUVRow_LSX(const uint8_t* src_bgra,
int src_stride_bgra,
uint8_t* dst_u,
@@ -987,7 +1991,7 @@ void BGRAToUVRow_LSX(const uint8_t* src_bgra,
__m128i const_38 = __lsx_vldi(0x413);
__m128i const_94 = __lsx_vldi(0x42F);
__m128i const_18 = __lsx_vldi(0x409);
- __m128i const_8080 = {0x8080808080808080, 0x8080808080808080};
+ __m128i const_8080 = (__m128i)v2u64{0x8080808080808080, 0x8080808080808080};
for (x = 0; x < len; x++) {
DUP4_ARG2(__lsx_vld, src_bgra, 0, src_bgra, 16, src_bgra, 32, src_bgra, 48,
@@ -1018,34 +2022,6 @@ void BGRAToUVRow_LSX(const uint8_t* src_bgra,
}
}
-void ABGRToYRow_LSX(const uint8_t* src_abgr, uint8_t* dst_y, int width) {
- int x;
- int len = width / 16;
- __m128i src0, src1, src2, src3, dst0;
- __m128i tmp0, tmp1, tmp2, tmp3;
- __m128i reg0, reg1;
- __m128i const_129 = __lsx_vldi(0x81);
- __m128i const_br = {0x1942194219421942, 0x1942194219421942};
- __m128i const_1080 = {0x1080108010801080, 0x1080108010801080};
-
- for (x = 0; x < len; x++) {
- DUP4_ARG2(__lsx_vld, src_abgr, 0, src_abgr, 16, src_abgr, 32, src_abgr, 48,
- src0, src1, src2, src3);
- tmp0 = __lsx_vpickev_b(src1, src0);
- tmp1 = __lsx_vpickod_b(src1, src0);
- tmp2 = __lsx_vpickev_b(src3, src2);
- tmp3 = __lsx_vpickod_b(src3, src2);
- reg0 = __lsx_vmaddwev_h_bu(const_1080, tmp1, const_129);
- reg1 = __lsx_vmaddwev_h_bu(const_1080, tmp3, const_129);
- reg0 = __lsx_vdp2add_h_bu(reg0, const_br, tmp0);
- reg1 = __lsx_vdp2add_h_bu(reg1, const_br, tmp2);
- dst0 = __lsx_vsrlni_b_h(reg1, reg0, 8);
- __lsx_vst(dst0, dst_y, 0);
- dst_y += 16;
- src_abgr += 64;
- }
-}
-
void ABGRToUVRow_LSX(const uint8_t* src_abgr,
int src_stride_abgr,
uint8_t* dst_u,
@@ -1063,7 +2039,7 @@ void ABGRToUVRow_LSX(const uint8_t* src_abgr,
__m128i const_38 = __lsx_vldi(0x413);
__m128i const_94 = __lsx_vldi(0x42F);
__m128i const_18 = __lsx_vldi(0x409);
- __m128i const_8080 = {0x8080808080808080, 0x8080808080808080};
+ __m128i const_8080 = (__m128i)v2u64{0x8080808080808080, 0x8080808080808080};
for (x = 0; x < len; x++) {
DUP4_ARG2(__lsx_vld, src_abgr, 0, src_abgr, 16, src_abgr, 32, src_abgr, 48,
@@ -1094,34 +2070,6 @@ void ABGRToUVRow_LSX(const uint8_t* src_abgr,
}
}
-void RGBAToYRow_LSX(const uint8_t* src_rgba, uint8_t* dst_y, int width) {
- int x;
- int len = width / 16;
- __m128i src0, src1, src2, src3, dst0;
- __m128i tmp0, tmp1, tmp2, tmp3;
- __m128i reg0, reg1;
- __m128i const_129 = __lsx_vldi(0x81);
- __m128i const_br = {0x4219421942194219, 0x4219421942194219};
- __m128i const_1080 = {0x1080108010801080, 0x1080108010801080};
-
- for (x = 0; x < len; x++) {
- DUP4_ARG2(__lsx_vld, src_rgba, 0, src_rgba, 16, src_rgba, 32, src_rgba, 48,
- src0, src1, src2, src3);
- tmp0 = __lsx_vpickod_b(src1, src0);
- tmp1 = __lsx_vpickev_b(src1, src0);
- tmp2 = __lsx_vpickod_b(src3, src2);
- tmp3 = __lsx_vpickev_b(src3, src2);
- reg0 = __lsx_vmaddwod_h_bu(const_1080, tmp1, const_129);
- reg1 = __lsx_vmaddwod_h_bu(const_1080, tmp3, const_129);
- reg0 = __lsx_vdp2add_h_bu(reg0, const_br, tmp0);
- reg1 = __lsx_vdp2add_h_bu(reg1, const_br, tmp2);
- dst0 = __lsx_vsrlni_b_h(reg1, reg0, 8);
- __lsx_vst(dst0, dst_y, 0);
- dst_y += 16;
- src_rgba += 64;
- }
-}
-
void RGBAToUVRow_LSX(const uint8_t* src_rgba,
int src_stride_rgba,
uint8_t* dst_u,
@@ -1139,7 +2087,7 @@ void RGBAToUVRow_LSX(const uint8_t* src_rgba,
__m128i const_38 = __lsx_vldi(0x413);
__m128i const_94 = __lsx_vldi(0x42F);
__m128i const_18 = __lsx_vldi(0x409);
- __m128i const_8080 = {0x8080808080808080, 0x8080808080808080};
+ __m128i const_8080 = (__m128i)v2u64{0x8080808080808080, 0x8080808080808080};
for (x = 0; x < len; x++) {
DUP4_ARG2(__lsx_vld, src_rgba, 0, src_rgba, 16, src_rgba, 32, src_rgba, 48,
@@ -1188,7 +2136,7 @@ void ARGBToUVJRow_LSX(const uint8_t* src_argb,
__m128i const_21 = __lsx_vldi(0x415);
__m128i const_53 = __lsx_vldi(0x435);
__m128i const_10 = __lsx_vldi(0x40A);
- __m128i const_8080 = {0x8080808080808080, 0x8080808080808080};
+ __m128i const_8080 = (__m128i)v2u64{0x8080808080808080, 0x8080808080808080};
for (x = 0; x < len; x++) {
DUP4_ARG2(__lsx_vld, src_argb, 0, src_argb, 16, src_argb, 32, src_argb, 48,
@@ -1566,7 +2514,7 @@ void ARGBBlendRow_LSX(const uint8_t* src_argb,
__m128i const_256 = __lsx_vldi(0x500);
__m128i zero = __lsx_vldi(0);
__m128i alpha = __lsx_vldi(0xFF);
- __m128i control = {0xFF000000FF000000, 0xFF000000FF000000};
+ __m128i control = (__m128i)v2u64{0xFF000000FF000000, 0xFF000000FF000000};
for (x = 0; x < len; x++) {
DUP4_ARG2(__lsx_vld, src_argb, 0, src_argb, 16, src_argb1, 0, src_argb1, 16,
@@ -1612,7 +2560,7 @@ void ARGBQuantizeRow_LSX(uint8_t* dst_argb,
__m128i vec_offset = __lsx_vreplgr2vr_b(interval_offset);
__m128i vec_scale = __lsx_vreplgr2vr_w(scale);
__m128i zero = __lsx_vldi(0);
- __m128i control = {0xFF000000FF000000, 0xFF000000FF000000};
+ __m128i control = (__m128i)v2u64{0xFF000000FF000000, 0xFF000000FF000000};
for (x = 0; x < len; x++) {
DUP4_ARG2(__lsx_vld, dst_argb, 0, dst_argb, 16, dst_argb, 32, dst_argb, 48,
@@ -1821,6 +2769,216 @@ void HalfFloatRow_LSX(const uint16_t* src,
}
}
+struct RgbConstants {
+ uint8_t kRGBToY[4];
+ uint16_t kAddY;
+ uint16_t pad;
+};
+
+// RGB to JPeg coefficients
+// B * 0.1140 coefficient = 29
+// G * 0.5870 coefficient = 150
+// R * 0.2990 coefficient = 77
+// Add 0.5 = 0x80
+static const struct RgbConstants kRgb24JPEGConstants = {{29, 150, 77, 0},
+ 128,
+ 0};
+
+static const struct RgbConstants kRawJPEGConstants = {{77, 150, 29, 0}, 128, 0};
+
+// RGB to BT.601 coefficients
+// B * 0.1016 coefficient = 25
+// G * 0.5078 coefficient = 129
+// R * 0.2578 coefficient = 66
+// Add 16.5 = 0x1080
+
+static const struct RgbConstants kRgb24I601Constants = {{25, 129, 66, 0},
+ 0x1080,
+ 0};
+
+static const struct RgbConstants kRawI601Constants = {{66, 129, 25, 0},
+ 0x1080,
+ 0};
+
+// ARGB expects first 3 values to contain RGB and 4th value is ignored.
+static void ARGBToYMatrixRow_LSX(const uint8_t* src_argb,
+ uint8_t* dst_y,
+ int width,
+ const struct RgbConstants* rgbconstants) {
+ asm volatile(
+ "vldrepl.b $vr0, %3, 0 \n\t" // load rgbconstants
+ "vldrepl.b $vr1, %3, 1 \n\t" // load rgbconstants
+ "vldrepl.b $vr2, %3, 2 \n\t" // load rgbconstants
+ "vldrepl.h $vr3, %3, 4 \n\t" // load rgbconstants
+ "1: \n\t"
+ "vld $vr4, %0, 0 \n\t"
+ "vld $vr5, %0, 16 \n\t"
+ "vld $vr6, %0, 32 \n\t"
+ "vld $vr7, %0, 48 \n\t" // load 16 pixels of
+ // ARGB
+ "vor.v $vr12, $vr3, $vr3 \n\t"
+ "vor.v $vr13, $vr3, $vr3 \n\t"
+ "addi.d %2, %2, -16 \n\t" // 16 processed per
+ // loop.
+ "vpickev.b $vr8, $vr5, $vr4 \n\t" // BR
+ "vpickev.b $vr10, $vr7, $vr6 \n\t"
+ "vpickod.b $vr9, $vr5, $vr4 \n\t" // GA
+ "vpickod.b $vr11, $vr7, $vr6 \n\t"
+ "vmaddwev.h.bu $vr12, $vr8, $vr0 \n\t" // B
+ "vmaddwev.h.bu $vr13, $vr10, $vr0 \n\t"
+ "vmaddwev.h.bu $vr12, $vr9, $vr1 \n\t" // G
+ "vmaddwev.h.bu $vr13, $vr11, $vr1 \n\t"
+ "vmaddwod.h.bu $vr12, $vr8, $vr2 \n\t" // R
+ "vmaddwod.h.bu $vr13, $vr10, $vr2 \n\t"
+ "addi.d %0, %0, 64 \n\t"
+ "vpickod.b $vr10, $vr13, $vr12 \n\t"
+ "vst $vr10, %1, 0 \n\t"
+ "addi.d %1, %1, 16 \n\t"
+ "bnez %2, 1b \n\t"
+ : "+&r"(src_argb), // %0
+ "+&r"(dst_y), // %1
+ "+&r"(width) // %2
+ : "r"(rgbconstants)
+ : "memory");
+}
+
+void ARGBToYRow_LSX(const uint8_t* src_argb, uint8_t* dst_y, int width) {
+ ARGBToYMatrixRow_LSX(src_argb, dst_y, width, &kRgb24I601Constants);
+}
+
+void ARGBToYJRow_LSX(const uint8_t* src_argb, uint8_t* dst_yj, int width) {
+ ARGBToYMatrixRow_LSX(src_argb, dst_yj, width, &kRgb24JPEGConstants);
+}
+
+void ABGRToYRow_LSX(const uint8_t* src_abgr, uint8_t* dst_y, int width) {
+ ARGBToYMatrixRow_LSX(src_abgr, dst_y, width, &kRawI601Constants);
+}
+
+void ABGRToYJRow_LSX(const uint8_t* src_abgr, uint8_t* dst_yj, int width) {
+ ARGBToYMatrixRow_LSX(src_abgr, dst_yj, width, &kRawJPEGConstants);
+}
+
+// RGBA expects first value to be A and ignored, then 3 values to contain RGB.
+// Same code as ARGB, except the LD4
+static void RGBAToYMatrixRow_LSX(const uint8_t* src_rgba,
+ uint8_t* dst_y,
+ int width,
+ const struct RgbConstants* rgbconstants) {
+ asm volatile(
+ "vldrepl.b $vr0, %3, 0 \n\t" // load rgbconstants
+ "vldrepl.b $vr1, %3, 1 \n\t" // load rgbconstants
+ "vldrepl.b $vr2, %3, 2 \n\t" // load rgbconstants
+ "vldrepl.h $vr3, %3, 4 \n\t" // load rgbconstants
+ "1: \n\t"
+ "vld $vr4, %0, 0 \n\t"
+ "vld $vr5, %0, 16 \n\t"
+ "vld $vr6, %0, 32 \n\t"
+ "vld $vr7, %0, 48 \n\t" // load 16 pixels of
+ // RGBA
+ "vor.v $vr12, $vr3, $vr3 \n\t"
+ "vor.v $vr13, $vr3, $vr3 \n\t"
+ "addi.d %2, %2, -16 \n\t" // 16 processed per
+ // loop.
+ "vpickev.b $vr8, $vr5, $vr4 \n\t" // AG
+ "vpickev.b $vr10, $vr7, $vr6 \n\t"
+ "vpickod.b $vr9, $vr5, $vr4 \n\t" // BR
+ "vpickod.b $vr11, $vr7, $vr6 \n\t"
+ "vmaddwev.h.bu $vr12, $vr9, $vr0 \n\t" // B
+ "vmaddwev.h.bu $vr13, $vr11, $vr0 \n\t"
+ "vmaddwod.h.bu $vr12, $vr8, $vr1 \n\t" // G
+ "vmaddwod.h.bu $vr13, $vr10, $vr1 \n\t"
+ "vmaddwod.h.bu $vr12, $vr9, $vr2 \n\t" // R
+ "vmaddwod.h.bu $vr13, $vr11, $vr2 \n\t"
+ "addi.d %0, %0, 64 \n\t"
+ "vpickod.b $vr10, $vr13, $vr12 \n\t"
+ "vst $vr10, %1, 0 \n\t"
+ "addi.d %1, %1, 16 \n\t"
+ "bnez %2, 1b \n\t"
+ : "+&r"(src_rgba), // %0
+ "+&r"(dst_y), // %1
+ "+&r"(width) // %2
+ : "r"(rgbconstants)
+ : "memory");
+}
+
+void RGBAToYRow_LSX(const uint8_t* src_rgba, uint8_t* dst_y, int width) {
+ RGBAToYMatrixRow_LSX(src_rgba, dst_y, width, &kRgb24I601Constants);
+}
+
+void RGBAToYJRow_LSX(const uint8_t* src_rgba, uint8_t* dst_yj, int width) {
+ RGBAToYMatrixRow_LSX(src_rgba, dst_yj, width, &kRgb24JPEGConstants);
+}
+
+void BGRAToYRow_LSX(const uint8_t* src_bgra, uint8_t* dst_y, int width) {
+ RGBAToYMatrixRow_LSX(src_bgra, dst_y, width, &kRawI601Constants);
+}
+
+static void RGBToYMatrixRow_LSX(const uint8_t* src_rgba,
+ uint8_t* dst_y,
+ int width,
+ const struct RgbConstants* rgbconstants) {
+ int8_t shuff[64] = {0, 2, 3, 5, 6, 8, 9, 11, 12, 14, 15, 17, 18,
+ 20, 21, 23, 24, 26, 27, 29, 30, 0, 1, 3, 4, 6,
+ 7, 9, 10, 12, 13, 15, 1, 0, 4, 0, 7, 0, 10,
+ 0, 13, 0, 16, 0, 19, 0, 22, 0, 25, 0, 28, 0,
+ 31, 0, 2, 0, 5, 0, 8, 0, 11, 0, 14, 0};
+ asm volatile(
+ "vldrepl.b $vr0, %3, 0 \n\t" // load rgbconstants
+ "vldrepl.b $vr1, %3, 1 \n\t" // load rgbconstants
+ "vldrepl.b $vr2, %3, 2 \n\t" // load rgbconstants
+ "vldrepl.h $vr3, %3, 4 \n\t" // load rgbconstants
+ "vld $vr4, %4, 0 \n\t" // load shuff
+ "vld $vr5, %4, 16 \n\t"
+ "vld $vr6, %4, 32 \n\t"
+ "vld $vr7, %4, 48 \n\t"
+ "1: \n\t"
+ "vld $vr8, %0, 0 \n\t"
+ "vld $vr9, %0, 16 \n\t"
+ "vld $vr10, %0, 32 \n\t" // load 16 pixels of
+ // RGB
+ "vor.v $vr12, $vr3, $vr3 \n\t"
+ "vor.v $vr13, $vr3, $vr3 \n\t"
+ "addi.d %2, %2, -16 \n\t" // 16 processed per
+ // loop.
+ "vshuf.b $vr14, $vr9, $vr8, $vr4 \n\t"
+ "vshuf.b $vr15, $vr9, $vr10, $vr5 \n\t"
+ "vshuf.b $vr16, $vr9, $vr8, $vr6 \n\t"
+ "vshuf.b $vr17, $vr9, $vr10, $vr7 \n\t"
+ "vmaddwev.h.bu $vr12, $vr16, $vr1 \n\t" // G
+ "vmaddwev.h.bu $vr13, $vr17, $vr1 \n\t"
+ "vmaddwev.h.bu $vr12, $vr14, $vr0 \n\t" // B
+ "vmaddwev.h.bu $vr13, $vr15, $vr0 \n\t"
+ "vmaddwod.h.bu $vr12, $vr14, $vr2 \n\t" // R
+ "vmaddwod.h.bu $vr13, $vr15, $vr2 \n\t"
+ "addi.d %0, %0, 48 \n\t"
+ "vpickod.b $vr10, $vr13, $vr12 \n\t"
+ "vst $vr10, %1, 0 \n\t"
+ "addi.d %1, %1, 16 \n\t"
+ "bnez %2, 1b \n\t"
+ : "+&r"(src_rgba), // %0
+ "+&r"(dst_y), // %1
+ "+&r"(width) // %2
+ : "r"(rgbconstants), // %3
+ "r"(shuff) // %4
+ : "memory");
+}
+
+void RGB24ToYJRow_LSX(const uint8_t* src_rgb24, uint8_t* dst_yj, int width) {
+ RGBToYMatrixRow_LSX(src_rgb24, dst_yj, width, &kRgb24JPEGConstants);
+}
+
+void RAWToYJRow_LSX(const uint8_t* src_raw, uint8_t* dst_yj, int width) {
+ RGBToYMatrixRow_LSX(src_raw, dst_yj, width, &kRawJPEGConstants);
+}
+
+void RGB24ToYRow_LSX(const uint8_t* src_rgb24, uint8_t* dst_y, int width) {
+ RGBToYMatrixRow_LSX(src_rgb24, dst_y, width, &kRgb24I601Constants);
+}
+
+void RAWToYRow_LSX(const uint8_t* src_raw, uint8_t* dst_y, int width) {
+ RGBToYMatrixRow_LSX(src_raw, dst_y, width, &kRawI601Constants);
+}
+
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
diff --git a/files/source/row_msa.cc b/source/row_msa.cc
index b7d5bb5e..b7d5bb5e 100644
--- a/files/source/row_msa.cc
+++ b/source/row_msa.cc
diff --git a/files/source/row_neon.cc b/source/row_neon.cc
index 804ff839..31142a90 100644
--- a/files/source/row_neon.cc
+++ b/source/row_neon.cc
@@ -89,12 +89,14 @@ extern "C" {
"vsli.u16 d2, d2, #8 \n" \
"vsri.u16 d3, d3, #8 \n"
+// TODO: Use single register for kUVCoeff and multiply by lane
#define YUVTORGB_SETUP \
+ "vld1.16 {d31}, [%[kRGBCoeffBias]] \n" \
"vld4.8 {d26[], d27[], d28[], d29[]}, [%[kUVCoeff]] \n" \
- "vld1.16 {d31[]}, [%[kRGBCoeffBias]]! \n" \
- "vld1.16 {d20[], d21[]}, [%[kRGBCoeffBias]]! \n" \
- "vld1.16 {d22[], d23[]}, [%[kRGBCoeffBias]]! \n" \
- "vld1.16 {d24[], d25[]}, [%[kRGBCoeffBias]] \n"
+ "vdup.u16 q10, d31[1] \n" \
+ "vdup.u16 q11, d31[2] \n" \
+ "vdup.u16 q12, d31[3] \n" \
+ "vdup.u16 d31, d31[0] \n"
// q0: B uint16x8_t
// q1: G uint16x8_t
@@ -156,6 +158,29 @@ void I444ToARGBRow_NEON(const uint8_t* src_y,
: "cc", "memory", YUVTORGB_REGS, "d6");
}
+void I444ToRGB24Row_NEON(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_rgb24,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ asm volatile(
+ YUVTORGB_SETUP
+ "1: \n" READYUV444 YUVTORGB
+ RGBTORGB8
+ "subs %[width], %[width], #8 \n"
+ "vst3.8 {d0, d2, d4}, [%[dst_rgb24]]! \n"
+ "bgt 1b \n"
+ : [src_y] "+r"(src_y), // %[src_y]
+ [src_u] "+r"(src_u), // %[src_u]
+ [src_v] "+r"(src_v), // %[src_v]
+ [dst_rgb24] "+r"(dst_rgb24), // %[dst_argb]
+ [width] "+r"(width) // %[width]
+ : [kUVCoeff] "r"(&yuvconstants->kUVCoeff), // %[kUVCoeff]
+ [kRGBCoeffBias] "r"(&yuvconstants->kRGBCoeffBias) // %[kRGBCoeffBias]
+ : "cc", "memory", YUVTORGB_REGS);
+}
+
void I422ToARGBRow_NEON(const uint8_t* src_y,
const uint8_t* src_u,
const uint8_t* src_v,
@@ -586,10 +611,10 @@ void DetileRow_NEON(const uint8_t* src,
int width) {
asm volatile(
"1: \n"
- "vld1.16 {q0}, [%0], %3 \n" // load 16 bytes
+ "vld1.8 {q0}, [%0], %3 \n" // load 16 bytes
"subs %2, %2, #16 \n" // 16 processed per loop
- "pld [%0, 1792] \n"
- "vst1.16 {q0}, [%1]! \n" // store 16 bytes
+ "pld [%0, #1792] \n"
+ "vst1.8 {q0}, [%1]! \n" // store 16 bytes
"bgt 1b \n"
: "+r"(src), // %0
"+r"(dst), // %1
@@ -599,6 +624,26 @@ void DetileRow_NEON(const uint8_t* src,
);
}
+// Reads 16 byte Y's of 16 bits from tile and writes out 16 Y's.
+void DetileRow_16_NEON(const uint16_t* src,
+ ptrdiff_t src_tile_stride,
+ uint16_t* dst,
+ int width) {
+ asm volatile(
+ "1: \n"
+ "vld1.16 {q0, q1}, [%0], %3 \n" // load 16 pixels
+ "subs %2, %2, #16 \n" // 16 processed per loop
+ "pld [%0, #3584] \n"
+ "vst1.16 {q0, q1}, [%1]! \n" // store 16 pixels
+ "bgt 1b \n"
+ : "+r"(src), // %0
+ "+r"(dst), // %1
+ "+r"(width) // %2
+ : "r"(src_tile_stride * 2) // %3
+ : "cc", "memory", "q0", "q1" // Clobber List
+ );
+}
+
// Read 16 bytes of UV, detile, and write 8 bytes of U and 8 bytes of V.
void DetileSplitUVRow_NEON(const uint8_t* src_uv,
ptrdiff_t src_tile_stride,
@@ -609,7 +654,7 @@ void DetileSplitUVRow_NEON(const uint8_t* src_uv,
"1: \n"
"vld2.8 {d0, d1}, [%0], %4 \n"
"subs %3, %3, #16 \n"
- "pld [%0, 1792] \n"
+ "pld [%0, #1792] \n"
"vst1.8 {d0}, [%1]! \n"
"vst1.8 {d1}, [%2]! \n"
"bgt 1b \n"
@@ -622,6 +667,101 @@ void DetileSplitUVRow_NEON(const uint8_t* src_uv,
);
}
+#if LIBYUV_USE_ST2
+// Read 16 Y, 8 UV, and write 8 YUYV.
+void DetileToYUY2_NEON(const uint8_t* src_y,
+ ptrdiff_t src_y_tile_stride,
+ const uint8_t* src_uv,
+ ptrdiff_t src_uv_tile_stride,
+ uint8_t* dst_yuy2,
+ int width) {
+ asm volatile(
+ "1: \n"
+ "vld1.8 {q0}, [%0], %4 \n" // Load 16 Y
+ "pld [%0, #1792] \n"
+ "vld1.8 {q1}, [%1], %5 \n" // Load 8 UV
+ "pld [%1, #1792] \n"
+ "subs %3, %3, #16 \n"
+ "vst2.8 {q0, q1}, [%2]! \n"
+ "bgt 1b \n"
+ : "+r"(src_y), // %0
+ "+r"(src_uv), // %1
+ "+r"(dst_yuy2), // %2
+ "+r"(width) // %3
+ : "r"(src_y_tile_stride), // %4
+ "r"(src_uv_tile_stride) // %5
+ : "cc", "memory", "d0", "d1", "d2", "d3" // Clobber list
+ );
+}
+#else
+// Read 16 Y, 8 UV, and write 8 YUYV.
+void DetileToYUY2_NEON(const uint8_t* src_y,
+ ptrdiff_t src_y_tile_stride,
+ const uint8_t* src_uv,
+ ptrdiff_t src_uv_tile_stride,
+ uint8_t* dst_yuy2,
+ int width) {
+ asm volatile(
+ "1: \n"
+ "vld1.8 {q0}, [%0], %4 \n" // Load 16 Y
+ "vld1.8 {q1}, [%1], %5 \n" // Load 8 UV
+ "subs %3, %3, #16 \n"
+ "pld [%0, #1792] \n"
+ "vzip.8 q0, q1 \n"
+ "pld [%1, #1792] \n"
+ "vst1.8 {q0, q1}, [%2]! \n"
+ "bgt 1b \n"
+ : "+r"(src_y), // %0
+ "+r"(src_uv), // %1
+ "+r"(dst_yuy2), // %2
+ "+r"(width) // %3
+ : "r"(src_y_tile_stride), // %4
+ "r"(src_uv_tile_stride) // %5
+ : "cc", "memory", "q0", "q1", "q2", "q3" // Clobber list
+ );
+}
+#endif
+
+void UnpackMT2T_NEON(const uint8_t* src, uint16_t* dst, size_t size) {
+ asm volatile(
+ "1: \n"
+ "vld1.8 {q14}, [%0]! \n" // Load lower bits.
+ "vld1.8 {q9}, [%0]! \n" // Load upper bits row
+ // by row.
+ "vld1.8 {q11}, [%0]! \n"
+ "vld1.8 {q13}, [%0]! \n"
+ "vld1.8 {q15}, [%0]! \n"
+ "vshl.u8 q8, q14, #6 \n" // Shift lower bit data
+ // appropriately.
+ "vshl.u8 q10, q14, #4 \n"
+ "vshl.u8 q12, q14, #2 \n"
+ "vzip.u8 q8, q9 \n" // Interleave upper and
+ // lower bits.
+ "vzip.u8 q10, q11 \n"
+ "vzip.u8 q12, q13 \n"
+ "vzip.u8 q14, q15 \n"
+ "vsri.u16 q8, q8, #10 \n" // Copy upper 6 bits
+ // into lower 6 bits for
+ // better accuracy in
+ // conversions.
+ "vsri.u16 q9, q9, #10 \n"
+ "vsri.u16 q10, q10, #10 \n"
+ "vsri.u16 q11, q11, #10 \n"
+ "vsri.u16 q12, q12, #10 \n"
+ "vsri.u16 q13, q13, #10 \n"
+ "vsri.u16 q14, q14, #10 \n"
+ "vsri.u16 q15, q15, #10 \n"
+ "vstmia %1!, {q8-q15} \n" // Store pixel block (64
+ // pixels).
+ "subs %2, %2, #80 \n"
+ "bgt 1b \n"
+ : "+r"(src), // %0
+ "+r"(dst), // %1
+ "+r"(size) // %2
+ :
+ : "cc", "memory", "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15");
+}
+
// Reads 16 U's and V's and writes out 16 pairs of UV.
void MergeUVRow_NEON(const uint8_t* src_u,
const uint8_t* src_v,
@@ -664,7 +804,7 @@ void SplitRGBRow_NEON(const uint8_t* src_rgb,
"+r"(dst_b), // %3
"+r"(width) // %4
: // Input registers
- : "cc", "memory", "d0", "d1", "d2" // Clobber List
+ : "cc", "memory", "q0", "q1", "q2" // Clobber List
);
}
@@ -1505,6 +1645,29 @@ void UYVYToUVRow_NEON(const uint8_t* src_uyvy,
);
}
+void YUY2ToNVUVRow_NEON(const uint8_t* src_yuy2,
+ int stride_yuy2,
+ uint8_t* dst_uv,
+ int width) {
+ asm volatile(
+ "add %1, %0, %1 \n" // stride + src_yuy2
+ "1: \n"
+ "vld2.8 {q0, q1}, [%0]! \n" // load 16 pixels of YUY2.
+ "subs %3, %3, #16 \n" // 16 pixels = 8 UVs.
+ "vld2.8 {q2, q3}, [%1]! \n" // load next row YUY2.
+ "vrhadd.u8 q4, q1, q3 \n" // average rows of UV
+ "vst1.8 {q4}, [%2]! \n" // store 8 UV.
+ "bgt 1b \n"
+ : "+r"(src_yuy2), // %0
+ "+r"(stride_yuy2), // %1
+ "+r"(dst_uv), // %2
+ "+r"(width) // %3
+ :
+ : "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6",
+ "d7" // Clobber List
+ );
+}
+
// For BGRAToARGB, ABGRToARGB, RGBAToARGB, and ARGBToRGBA.
void ARGBShuffleRow_NEON(const uint8_t* src_argb,
uint8_t* dst_argb,
@@ -1590,7 +1753,7 @@ void ARGBToRGB565Row_NEON(const uint8_t* src_argb,
void ARGBToRGB565DitherRow_NEON(const uint8_t* src_argb,
uint8_t* dst_rgb,
- const uint32_t dither4,
+ uint32_t dither4,
int width) {
asm volatile(
"vdup.32 d7, %2 \n" // dither4
@@ -1664,19 +1827,27 @@ void ARGBExtractAlphaRow_NEON(const uint8_t* src_argb,
);
}
+struct RgbUVConstants {
+ uint8_t kRGBToU[4];
+ uint8_t kRGBToV[4];
+};
+
// 8x1 pixels.
-void ARGBToUV444Row_NEON(const uint8_t* src_argb,
- uint8_t* dst_u,
- uint8_t* dst_v,
- int width) {
- asm volatile(
- "vmov.u8 d24, #112 \n" // UB / VR 0.875
- // coefficient
- "vmov.u8 d25, #74 \n" // UG -0.5781 coefficient
- "vmov.u8 d26, #38 \n" // UR -0.2969 coefficient
- "vmov.u8 d27, #18 \n" // VB -0.1406 coefficient
- "vmov.u8 d28, #94 \n" // VG -0.7344 coefficient
+void ARGBToUV444MatrixRow_NEON(const uint8_t* src_argb,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width,
+ const struct RgbUVConstants* rgbuvconstants) {
+ asm volatile(
+
+ "vld1.8 {d0}, [%4] \n" // load rgbuvconstants
+ "vdup.u8 d24, d0[0] \n" // UB 0.875 coefficient
+ "vdup.u8 d25, d0[1] \n" // UG -0.5781 coefficient
+ "vdup.u8 d26, d0[2] \n" // UR -0.2969 coefficient
+ "vdup.u8 d27, d0[4] \n" // VB -0.1406 coefficient
+ "vdup.u8 d28, d0[5] \n" // VG -0.7344 coefficient
"vmov.u16 q15, #0x8080 \n" // 128.5
+
"1: \n"
"vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 ARGB pixels.
"subs %3, %3, #8 \n" // 8 processed per loop.
@@ -1694,15 +1865,53 @@ void ARGBToUV444Row_NEON(const uint8_t* src_argb,
"vst1.8 {d0}, [%1]! \n" // store 8 pixels U.
"vst1.8 {d1}, [%2]! \n" // store 8 pixels V.
"bgt 1b \n"
- : "+r"(src_argb), // %0
- "+r"(dst_u), // %1
- "+r"(dst_v), // %2
- "+r"(width) // %3
- :
+ : "+r"(src_argb), // %0
+ "+r"(dst_u), // %1
+ "+r"(dst_v), // %2
+ "+r"(width) // %3
+ : "r"(rgbuvconstants) // %4
: "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q12", "q13", "q14",
"q15");
}
+// RGB to bt601 coefficients
+// UB 0.875 coefficient = 112
+// UG -0.5781 coefficient = 74
+// UR -0.2969 coefficient = 38
+// VB -0.1406 coefficient = 18
+// VG -0.7344 coefficient = 94
+// VR 0.875 coefficient = 112 (ignored)
+
+static const struct RgbUVConstants kRgb24I601UVConstants = {{112, 74, 38, 0},
+ {18, 94, 112, 0}};
+
+// RGB to JPeg coefficients
+// UB coeff 0.500 = 127
+// UG coeff -0.33126 = 84
+// UR coeff -0.16874 = 43
+// VB coeff -0.08131 = 20
+// VG coeff -0.41869 = 107
+// VR coeff 0.500 = 127 (ignored)
+
+static const struct RgbUVConstants kRgb24JPegUVConstants = {{127, 84, 43, 0},
+ {20, 107, 127, 0}};
+
+void ARGBToUV444Row_NEON(const uint8_t* src_argb,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width) {
+ ARGBToUV444MatrixRow_NEON(src_argb, dst_u, dst_v, width,
+ &kRgb24I601UVConstants);
+}
+
+void ARGBToUVJ444Row_NEON(const uint8_t* src_argb,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width) {
+ ARGBToUV444MatrixRow_NEON(src_argb, dst_u, dst_v, width,
+ &kRgb24JPegUVConstants);
+}
+
// clang-format off
// 16x2 pixels -> 8x1. width is number of argb pixels. e.g. 16.
#define RGBTOUV(QB, QG, QR) \
@@ -1762,7 +1971,7 @@ void ARGBToUVRow_NEON(const uint8_t* src_argb,
);
}
-// TODO(fbarchard): Subsample match C code.
+// TODO(fbarchard): Subsample match Intel code.
void ARGBToUVJRow_NEON(const uint8_t* src_argb,
int src_stride_argb,
uint8_t* dst_u,
@@ -1808,6 +2017,51 @@ void ARGBToUVJRow_NEON(const uint8_t* src_argb,
);
}
+void ABGRToUVJRow_NEON(const uint8_t* src_abgr,
+ int src_stride_abgr,
+ uint8_t* dst_uj,
+ uint8_t* dst_vj,
+ int width) {
+ asm volatile (
+ "add %1, %0, %1 \n" // src_stride + src_argb
+ "vmov.s16 q10, #127 / 2 \n" // UB / VR 0.500 coefficient
+ "vmov.s16 q11, #84 / 2 \n" // UG -0.33126 coefficient
+ "vmov.s16 q12, #43 / 2 \n" // UR -0.16874 coefficient
+ "vmov.s16 q13, #20 / 2 \n" // VB -0.08131 coefficient
+ "vmov.s16 q14, #107 / 2 \n" // VG -0.41869 coefficient
+ "vmov.u16 q15, #0x8080 \n" // 128.5
+ "1: \n"
+ "vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 ABGR pixels.
+ "vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 ABGR pixels.
+ "vpaddl.u8 q0, q0 \n" // R 16 bytes -> 8 shorts.
+ "vpaddl.u8 q1, q1 \n" // G 16 bytes -> 8 shorts.
+ "vpaddl.u8 q2, q2 \n" // B 16 bytes -> 8 shorts.
+ "vld4.8 {d8, d10, d12, d14}, [%1]! \n" // load 8 more ABGR pixels.
+ "vld4.8 {d9, d11, d13, d15}, [%1]! \n" // load last 8 ABGR pixels.
+ "vpadal.u8 q0, q4 \n" // R 16 bytes -> 8 shorts.
+ "vpadal.u8 q1, q5 \n" // G 16 bytes -> 8 shorts.
+ "vpadal.u8 q2, q6 \n" // B 16 bytes -> 8 shorts.
+
+ "vrshr.u16 q0, q0, #1 \n" // 2x average
+ "vrshr.u16 q1, q1, #1 \n"
+ "vrshr.u16 q2, q2, #1 \n"
+
+ "subs %4, %4, #16 \n" // 16 processed per loop.
+ RGBTOUV(q2, q1, q0)
+ "vst1.8 {d0}, [%2]! \n" // store 8 pixels U.
+ "vst1.8 {d1}, [%3]! \n" // store 8 pixels V.
+ "bgt 1b \n"
+ : "+r"(src_abgr), // %0
+ "+r"(src_stride_abgr), // %1
+ "+r"(dst_uj), // %2
+ "+r"(dst_vj), // %3
+ "+r"(width) // %4
+ :
+ : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
+ "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
+ );
+}
+
// TODO(fbarchard): Subsample match C code.
void RGB24ToUVJRow_NEON(const uint8_t* src_rgb24,
int src_stride_rgb24,
@@ -2494,7 +2748,6 @@ void AB64ToARGBRow_NEON(const uint16_t* src_ab64,
struct RgbConstants {
uint8_t kRGBToY[4];
uint16_t kAddY;
- uint16_t pad;
};
// RGB to JPeg coefficients
@@ -2502,11 +2755,9 @@ struct RgbConstants {
// G * 0.5870 coefficient = 150
// R * 0.2990 coefficient = 77
// Add 0.5 = 0x80
-static const struct RgbConstants kRgb24JPEGConstants = {{29, 150, 77, 0},
- 128,
- 0};
+static const struct RgbConstants kRgb24JPEGConstants = {{29, 150, 77, 0}, 128};
-static const struct RgbConstants kRawJPEGConstants = {{77, 150, 29, 0}, 128, 0};
+static const struct RgbConstants kRawJPEGConstants = {{77, 150, 29, 0}, 128};
// RGB to BT.601 coefficients
// B * 0.1016 coefficient = 25
@@ -2515,12 +2766,9 @@ static const struct RgbConstants kRawJPEGConstants = {{77, 150, 29, 0}, 128, 0};
// Add 16.5 = 0x1080
static const struct RgbConstants kRgb24I601Constants = {{25, 129, 66, 0},
- 0x1080,
- 0};
+ 0x1080};
-static const struct RgbConstants kRawI601Constants = {{66, 129, 25, 0},
- 0x1080,
- 0};
+static const struct RgbConstants kRawI601Constants = {{66, 129, 25, 0}, 0x1080};
// ARGB expects first 3 values to contain RGB and 4th value is ignored.
void ARGBToYMatrixRow_NEON(const uint8_t* src_argb,
@@ -2567,6 +2815,10 @@ void ABGRToYRow_NEON(const uint8_t* src_abgr, uint8_t* dst_y, int width) {
ARGBToYMatrixRow_NEON(src_abgr, dst_y, width, &kRawI601Constants);
}
+void ABGRToYJRow_NEON(const uint8_t* src_abgr, uint8_t* dst_yj, int width) {
+ ARGBToYMatrixRow_NEON(src_abgr, dst_yj, width, &kRawJPEGConstants);
+}
+
// RGBA expects first value to be A and ignored, then 3 values to contain RGB.
// Same code as ARGB, except the LD4
void RGBAToYMatrixRow_NEON(const uint8_t* src_rgba,
@@ -2846,6 +3098,8 @@ void ARGBAttenuateRow_NEON(const uint8_t* src_argb,
uint8_t* dst_argb,
int width) {
asm volatile(
+ "vmov.u16 q15, #0x00ff \n" // 255 for rounding up
+
// Attenuate 8 pixels.
"1: \n"
"vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 pixels of ARGB.
@@ -2853,16 +3107,16 @@ void ARGBAttenuateRow_NEON(const uint8_t* src_argb,
"vmull.u8 q10, d0, d3 \n" // b * a
"vmull.u8 q11, d1, d3 \n" // g * a
"vmull.u8 q12, d2, d3 \n" // r * a
- "vqrshrn.u16 d0, q10, #8 \n" // b >>= 8
- "vqrshrn.u16 d1, q11, #8 \n" // g >>= 8
- "vqrshrn.u16 d2, q12, #8 \n" // r >>= 8
+ "vaddhn.u16 d0, q10, q15 \n" // (b + 255) >> 8
+ "vaddhn.u16 d1, q11, q15 \n" // (g + 255) >> 8
+ "vaddhn.u16 d2, q12, q15 \n" // (r + 255) >> 8
"vst4.8 {d0, d1, d2, d3}, [%1]! \n" // store 8 pixels of ARGB.
"bgt 1b \n"
: "+r"(src_argb), // %0
"+r"(dst_argb), // %1
"+r"(width) // %2
:
- : "cc", "memory", "q0", "q1", "q10", "q11", "q12");
+ : "cc", "memory", "q0", "q1", "q10", "q11", "q12", "q15");
}
// Quantize 8 ARGB pixels (32 bytes).
@@ -3633,7 +3887,7 @@ void SplitUVRow_16_NEON(const uint16_t* src_uv,
"+r"(dst_v), // %2
"+r"(width) // %3
: "r"(shift) // %4
- : "cc", "memory", "q0", "q1", "q2", "q3", "q4");
+ : "cc", "memory", "q0", "q1", "q2");
}
void MergeUVRow_16_NEON(const uint16_t* src_u,
@@ -3687,31 +3941,25 @@ void DivideRow_16_NEON(const uint16_t* src_y,
int scale,
int width) {
asm volatile(
- "vdup.16 q0, %3 \n"
- "1: \n"
- "vld1.16 {q1}, [%0]! \n"
- "vld1.16 {q2}, [%0]! \n"
- "vmovl.u16 q3, d2 \n"
- "vmovl.u16 q1, d3 \n"
- "vmovl.u16 q4, d4 \n"
- "vmovl.u16 q2, d5 \n"
- "vshl.u32 q3, q3, q0 \n"
- "vshl.u32 q4, q4, q0 \n"
- "vshl.u32 q1, q1, q0 \n"
- "vshl.u32 q2, q2, q0 \n"
- "vmovn.u32 d2, q3 \n"
- "vmovn.u32 d3, q1 \n"
- "vmovn.u32 d4, q4 \n"
- "vmovn.u32 d5, q2 \n"
- "vst1.16 {q1}, [%1]! \n"
- "vst1.16 {q2}, [%1]! \n"
+ "vdup.16 d8, %3 \n"
+ "1: \n"
+ "vld1.16 {q2, q3}, [%0]! \n"
+ "vmull.u16 q0, d4, d8 \n"
+ "vmull.u16 q1, d5, d8 \n"
+ "vmull.u16 q2, d6, d8 \n"
+ "vmull.u16 q3, d7, d8 \n"
+ "vshrn.u32 d0, q0, #16 \n"
+ "vshrn.u32 d1, q1, #16 \n"
+ "vshrn.u32 d2, q2, #16 \n"
+ "vshrn.u32 d3, q3, #16 \n"
+ "vst1.16 {q0, q1}, [%1]! \n" // store 16 pixels
"subs %2, %2, #16 \n" // 16 src pixels per loop
"bgt 1b \n"
: "+r"(src_y), // %0
"+r"(dst_y), // %1
"+r"(width) // %2
: "r"(scale) // %3
- : "cc", "memory", "q0", "q1", "q2", "q3", "q4");
+ : "cc", "memory", "q0", "q1", "q2", "q3", "d8");
}
// Use scale to convert lsb formats to msb, depending how many bits there are:
diff --git a/files/source/row_neon64.cc b/source/row_neon64.cc
index 0f120373..1679f87c 100644
--- a/files/source/row_neon64.cc
+++ b/source/row_neon64.cc
@@ -142,6 +142,29 @@ void I444ToARGBRow_NEON(const uint8_t* src_y,
: "cc", "memory", YUVTORGB_REGS, "v19");
}
+void I444ToRGB24Row_NEON(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_rgb24,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ asm volatile(
+ YUVTORGB_SETUP
+ "1: \n" READYUV444 YUVTORGB
+ RGBTORGB8
+ "subs %w[width], %w[width], #8 \n"
+ "st3 {v16.8b,v17.8b,v18.8b}, [%[dst_rgb24]], #24 \n"
+ "b.gt 1b \n"
+ : [src_y] "+r"(src_y), // %[src_y]
+ [src_u] "+r"(src_u), // %[src_u]
+ [src_v] "+r"(src_v), // %[src_v]
+ [dst_rgb24] "+r"(dst_rgb24), // %[dst_rgb24]
+ [width] "+r"(width) // %[width]
+ : [kUVCoeff] "r"(&yuvconstants->kUVCoeff), // %[kUVCoeff]
+ [kRGBCoeffBias] "r"(&yuvconstants->kRGBCoeffBias) // %[kRGBCoeffBias]
+ : "cc", "memory", YUVTORGB_REGS);
+}
+
void I422ToARGBRow_NEON(const uint8_t* src_y,
const uint8_t* src_u,
const uint8_t* src_v,
@@ -627,6 +650,26 @@ void DetileRow_NEON(const uint8_t* src,
);
}
+// Reads 16 byte Y's of 16 bits from tile and writes out 16 Y's.
+void DetileRow_16_NEON(const uint16_t* src,
+ ptrdiff_t src_tile_stride,
+ uint16_t* dst,
+ int width) {
+ asm volatile(
+ "1: \n"
+ "ld1 {v0.8h,v1.8h}, [%0], %3 \n" // load 16 pixels
+ "subs %w2, %w2, #16 \n" // 16 processed per loop
+ "prfm pldl1keep, [%0, 3584] \n" // 7 tiles of 512b ahead
+ "st1 {v0.8h,v1.8h}, [%1], #32 \n" // store 16 pixels
+ "b.gt 1b \n"
+ : "+r"(src), // %0
+ "+r"(dst), // %1
+ "+r"(width) // %2
+ : "r"(src_tile_stride * 2) // %3
+ : "cc", "memory", "v0", "v1" // Clobber List
+ );
+}
+
// Read 16 bytes of UV, detile, and write 8 bytes of U and 8 bytes of V.
void DetileSplitUVRow_NEON(const uint8_t* src_uv,
ptrdiff_t src_tile_stride,
@@ -651,6 +694,100 @@ void DetileSplitUVRow_NEON(const uint8_t* src_uv,
}
#if LIBYUV_USE_ST2
+// Read 16 Y, 8 UV, and write 8 YUY2
+void DetileToYUY2_NEON(const uint8_t* src_y,
+ ptrdiff_t src_y_tile_stride,
+ const uint8_t* src_uv,
+ ptrdiff_t src_uv_tile_stride,
+ uint8_t* dst_yuy2,
+ int width) {
+ asm volatile(
+ "1: \n"
+ "ld1 {v0.16b}, [%0], %4 \n" // load 16 Ys
+ "prfm pldl1keep, [%0, 1792] \n"
+ "ld1 {v1.16b}, [%1], %5 \n" // load 8 UVs
+ "prfm pldl1keep, [%1, 1792] \n"
+ "subs %w3, %w3, #16 \n" // store 8 YUY2
+ "st2 {v0.16b,v1.16b}, [%2], #32 \n"
+ "b.gt 1b \n"
+ : "+r"(src_y), // %0
+ "+r"(src_uv), // %1
+ "+r"(dst_yuy2), // %2
+ "+r"(width) // %3
+ : "r"(src_y_tile_stride), // %4
+ "r"(src_uv_tile_stride) // %5
+ : "cc", "memory", "v0", "v1" // Clobber list
+ );
+}
+#else
+// Read 16 Y, 8 UV, and write 8 YUY2
+void DetileToYUY2_NEON(const uint8_t* src_y,
+ ptrdiff_t src_y_tile_stride,
+ const uint8_t* src_uv,
+ ptrdiff_t src_uv_tile_stride,
+ uint8_t* dst_yuy2,
+ int width) {
+ asm volatile(
+ "1: \n"
+ "ld1 {v0.16b}, [%0], %4 \n" // load 16 Ys
+ "ld1 {v1.16b}, [%1], %5 \n" // load 8 UVs
+ "subs %w3, %w3, #16 \n"
+ "prfm pldl1keep, [%0, 1792] \n"
+ "zip1 v2.16b, v0.16b, v1.16b \n"
+ "prfm pldl1keep, [%1, 1792] \n"
+ "zip2 v3.16b, v0.16b, v1.16b \n"
+ "st1 {v2.16b,v3.16b}, [%2], #32 \n" // store 8 YUY2
+ "b.gt 1b \n"
+ : "+r"(src_y), // %0
+ "+r"(src_uv), // %1
+ "+r"(dst_yuy2), // %2
+ "+r"(width) // %3
+ : "r"(src_y_tile_stride), // %4
+ "r"(src_uv_tile_stride) // %5
+ : "cc", "memory", "v0", "v1", "v2", "v3" // Clobber list
+ );
+}
+#endif
+
+// Unpack MT2T into tiled P010 64 pixels at a time. See
+// tinyurl.com/mtk-10bit-video-format for format documentation.
+void UnpackMT2T_NEON(const uint8_t* src, uint16_t* dst, size_t size) {
+ asm volatile(
+ "1: \n"
+ "ld1 {v7.16b}, [%0], #16 \n"
+ "ld1 {v0.16b-v3.16b}, [%0], #64 \n"
+ "shl v4.16b, v7.16b, #6 \n"
+ "shl v5.16b, v7.16b, #4 \n"
+ "shl v6.16b, v7.16b, #2 \n"
+ "subs %2, %2, #80 \n"
+ "zip1 v16.16b, v4.16b, v0.16b \n"
+ "zip1 v18.16b, v5.16b, v1.16b \n"
+ "zip1 v20.16b, v6.16b, v2.16b \n"
+ "zip1 v22.16b, v7.16b, v3.16b \n"
+ "zip2 v17.16b, v4.16b, v0.16b \n"
+ "zip2 v19.16b, v5.16b, v1.16b \n"
+ "zip2 v21.16b, v6.16b, v2.16b \n"
+ "zip2 v23.16b, v7.16b, v3.16b \n"
+ "sri v16.8h, v16.8h, #10 \n"
+ "sri v17.8h, v17.8h, #10 \n"
+ "sri v18.8h, v18.8h, #10 \n"
+ "sri v19.8h, v19.8h, #10 \n"
+ "st1 {v16.8h-v19.8h}, [%1], #64 \n"
+ "sri v20.8h, v20.8h, #10 \n"
+ "sri v21.8h, v21.8h, #10 \n"
+ "sri v22.8h, v22.8h, #10 \n"
+ "sri v23.8h, v23.8h, #10 \n"
+ "st1 {v20.8h-v23.8h}, [%1], #64 \n"
+ "b.gt 1b \n"
+ : "+r"(src), // %0
+ "+r"(dst), // %1
+ "+r"(size) // %2
+ :
+ : "cc", "memory", "w0", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
+ "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23");
+}
+
+#if LIBYUV_USE_ST2
// Reads 16 U's and V's and writes out 16 pairs of UV.
void MergeUVRow_NEON(const uint8_t* src_u,
const uint8_t* src_v,
@@ -1729,6 +1866,29 @@ void UYVYToUVRow_NEON(const uint8_t* src_uyvy,
);
}
+void YUY2ToNVUVRow_NEON(const uint8_t* src_yuy2,
+ int stride_yuy2,
+ uint8_t* dst_uv,
+ int width) {
+ const uint8_t* src_yuy2b = src_yuy2 + stride_yuy2;
+ asm volatile(
+ "1: \n"
+ "ld2 {v0.16b,v1.16b}, [%0], #32 \n" // load 16 pixels
+ "subs %w3, %w3, #16 \n" // 16 pixels = 8 UVs.
+ "ld2 {v2.16b,v3.16b}, [%1], #32 \n" // load next row
+ "urhadd v4.16b, v1.16b, v3.16b \n" // average rows of UV
+ "prfm pldl1keep, [%0, 448] \n"
+ "st1 {v4.16b}, [%2], #16 \n" // store 8 UV.
+ "b.gt 1b \n"
+ : "+r"(src_yuy2), // %0
+ "+r"(src_yuy2b), // %1
+ "+r"(dst_uv), // %2
+ "+r"(width) // %3
+ :
+ : "cc", "memory", "v0", "v1", "v2", "v3", "v4" // Clobber List
+ );
+}
+
// For BGRAToARGB, ABGRToARGB, RGBAToARGB, and ARGBToRGBA.
void ARGBShuffleRow_NEON(const uint8_t* src_argb,
uint8_t* dst_argb,
@@ -1819,24 +1979,23 @@ void ARGBToRGB565Row_NEON(const uint8_t* src_argb,
void ARGBToRGB565DitherRow_NEON(const uint8_t* src_argb,
uint8_t* dst_rgb,
- const uint32_t dither4,
+ uint32_t dither4,
int width) {
asm volatile(
- "dup v1.4s, %w2 \n" // dither4
+ "dup v1.4s, %w3 \n" // dither4
"1: \n"
- "ld4 {v16.8b,v17.8b,v18.8b,v19.8b}, [%1], #32 \n" // load 8
- // pixels
- "subs %w3, %w3, #8 \n" // 8 processed per loop.
+ "ld4 {v16.8b,v17.8b,v18.8b,v19.8b}, [%0], #32 \n" // load 8 ARGB
+ "subs %w2, %w2, #8 \n" // 8 processed per loop.
"uqadd v16.8b, v16.8b, v1.8b \n"
"prfm pldl1keep, [%0, 448] \n"
"uqadd v17.8b, v17.8b, v1.8b \n"
"uqadd v18.8b, v18.8b, v1.8b \n" ARGBTORGB565
- "st1 {v18.16b}, [%0], #16 \n" // store 8 pixels RGB565.
+ "st1 {v18.16b}, [%1], #16 \n" // store 8 pixels RGB565.
"b.gt 1b \n"
- : "+r"(dst_rgb) // %0
- : "r"(src_argb), // %1
- "r"(dither4), // %2
- "r"(width) // %3
+ : "+r"(src_argb), // %0
+ "+r"(dst_rgb), // %1
+ "+r"(width) // %2
+ : "r"(dither4) // %3
: "cc", "memory", "v1", "v16", "v17", "v18", "v19");
}
@@ -2039,19 +2198,26 @@ void ARGBExtractAlphaRow_NEON(const uint8_t* src_argb,
);
}
+struct RgbUVConstants {
+ uint8_t kRGBToU[4];
+ uint8_t kRGBToV[4];
+};
+
// 8x1 pixels.
-void ARGBToUV444Row_NEON(const uint8_t* src_argb,
- uint8_t* dst_u,
- uint8_t* dst_v,
- int width) {
+void ARGBToUV444MatrixRow_NEON(const uint8_t* src_argb,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width,
+ const struct RgbUVConstants* rgbuvconstants) {
asm volatile(
- "movi v24.8b, #112 \n" // UB / VR 0.875
- // coefficient
- "movi v25.8b, #74 \n" // UG -0.5781 coefficient
- "movi v26.8b, #38 \n" // UR -0.2969 coefficient
- "movi v27.8b, #18 \n" // VB -0.1406 coefficient
- "movi v28.8b, #94 \n" // VG -0.7344 coefficient
- "movi v29.16b,#0x80 \n" // 128.5
+ "ldr d0, [%4] \n" // load rgbuvconstants
+ "dup v24.16b, v0.b[0] \n" // UB 0.875 coefficient
+ "dup v25.16b, v0.b[1] \n" // UG -0.5781 coefficient
+ "dup v26.16b, v0.b[2] \n" // UR -0.2969 coefficient
+ "dup v27.16b, v0.b[4] \n" // VB -0.1406 coefficient
+ "dup v28.16b, v0.b[5] \n" // VG -0.7344 coefficient
+ "movi v29.16b, #0x80 \n" // 128.5
+
"1: \n"
"ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load 8 ARGB
"subs %w3, %w3, #8 \n" // 8 processed per loop.
@@ -2070,15 +2236,53 @@ void ARGBToUV444Row_NEON(const uint8_t* src_argb,
"st1 {v0.8b}, [%1], #8 \n" // store 8 pixels U.
"st1 {v1.8b}, [%2], #8 \n" // store 8 pixels V.
"b.gt 1b \n"
- : "+r"(src_argb), // %0
- "+r"(dst_u), // %1
- "+r"(dst_v), // %2
- "+r"(width) // %3
- :
+ : "+r"(src_argb), // %0
+ "+r"(dst_u), // %1
+ "+r"(dst_v), // %2
+ "+r"(width) // %3
+ : "r"(rgbuvconstants) // %4
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v24", "v25", "v26",
"v27", "v28", "v29");
}
+// RGB to bt601 coefficients
+// UB 0.875 coefficient = 112
+// UG -0.5781 coefficient = 74
+// UR -0.2969 coefficient = 38
+// VB -0.1406 coefficient = 18
+// VG -0.7344 coefficient = 94
+// VR 0.875 coefficient = 112 (ignored)
+
+static const struct RgbUVConstants kRgb24I601UVConstants = {{112, 74, 38, 0},
+ {18, 94, 112, 0}};
+
+// RGB to JPeg coefficients
+// UB coeff 0.500 = 127
+// UG coeff -0.33126 = 84
+// UR coeff -0.16874 = 43
+// VB coeff -0.08131 = 20
+// VG coeff -0.41869 = 107
+// VR coeff 0.500 = 127 (ignored)
+
+static const struct RgbUVConstants kRgb24JPegUVConstants = {{127, 84, 43, 0},
+ {20, 107, 127, 0}};
+
+void ARGBToUV444Row_NEON(const uint8_t* src_argb,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width) {
+ ARGBToUV444MatrixRow_NEON(src_argb, dst_u, dst_v, width,
+ &kRgb24I601UVConstants);
+}
+
+void ARGBToUVJ444Row_NEON(const uint8_t* src_argb,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width) {
+ ARGBToUV444MatrixRow_NEON(src_argb, dst_u, dst_v, width,
+ &kRgb24JPegUVConstants);
+}
+
#define RGBTOUV_SETUP_REG \
"movi v20.8h, #56, lsl #0 \n" /* UB/VR coefficient (0.875) / 2 */ \
"movi v21.8h, #37, lsl #0 \n" /* UG coefficient (-0.5781) / 2 */ \
@@ -2144,6 +2348,7 @@ void ARGBToUVRow_NEON(const uint8_t* src_argb,
);
}
+// TODO(fbarchard): Subsample match Intel code.
void ARGBToUVJRow_NEON(const uint8_t* src_argb,
int src_stride_argb,
uint8_t* dst_u,
@@ -2189,6 +2394,51 @@ void ARGBToUVJRow_NEON(const uint8_t* src_argb,
);
}
+void ABGRToUVJRow_NEON(const uint8_t* src_abgr,
+ int src_stride_abgr,
+ uint8_t* dst_uj,
+ uint8_t* dst_vj,
+ int width) {
+ const uint8_t* src_abgr_1 = src_abgr + src_stride_abgr;
+ asm volatile (
+ "movi v20.8h, #63, lsl #0 \n" // UB/VR coeff (0.500) / 2
+ "movi v21.8h, #42, lsl #0 \n" // UG coeff (-0.33126) / 2
+ "movi v22.8h, #21, lsl #0 \n" // UR coeff (-0.16874) / 2
+ "movi v23.8h, #10, lsl #0 \n" // VB coeff (-0.08131) / 2
+ "movi v24.8h, #53, lsl #0 \n" // VG coeff (-0.41869) / 2
+ "movi v25.16b, #0x80 \n" // 128.5 (0x8080 in 16-bit)
+ "1: \n"
+ "ld4 {v0.16b,v1.16b,v2.16b,v3.16b}, [%0], #64 \n" // load 16 pixels.
+ "uaddlp v0.8h, v0.16b \n" // R 16 bytes -> 8 shorts.
+ "prfm pldl1keep, [%0, 448] \n"
+ "uaddlp v1.8h, v1.16b \n" // G 16 bytes -> 8 shorts.
+ "uaddlp v2.8h, v2.16b \n" // B 16 bytes -> 8 shorts.
+ "ld4 {v4.16b,v5.16b,v6.16b,v7.16b}, [%1], #64 \n" // load next 16
+ "uadalp v0.8h, v4.16b \n" // R 16 bytes -> 8 shorts.
+ "prfm pldl1keep, [%1, 448] \n"
+ "uadalp v1.8h, v5.16b \n" // G 16 bytes -> 8 shorts.
+ "uadalp v2.8h, v6.16b \n" // B 16 bytes -> 8 shorts.
+
+ "urshr v0.8h, v0.8h, #1 \n" // 2x average
+ "urshr v1.8h, v1.8h, #1 \n"
+ "urshr v2.8h, v2.8h, #1 \n"
+
+ "subs %w4, %w4, #16 \n" // 16 processed per loop.
+ RGBTOUV(v2.8h, v1.8h, v0.8h)
+ "st1 {v0.8b}, [%2], #8 \n" // store 8 pixels U.
+ "st1 {v1.8b}, [%3], #8 \n" // store 8 pixels V.
+ "b.gt 1b \n"
+ : "+r"(src_abgr), // %0
+ "+r"(src_abgr_1), // %1
+ "+r"(dst_uj), // %2
+ "+r"(dst_vj), // %3
+ "+r"(width) // %4
+ :
+ : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
+ "v20", "v21", "v22", "v23", "v24", "v25"
+ );
+}
+
void RGB24ToUVJRow_NEON(const uint8_t* src_rgb24,
int src_stride_rgb24,
uint8_t* dst_u,
@@ -2738,34 +2988,8 @@ void ARGB4444ToYRow_NEON(const uint8_t* src_argb4444,
struct RgbConstants {
uint8_t kRGBToY[4];
uint16_t kAddY;
- uint16_t pad;
};
-// RGB to JPeg coefficients
-// B * 0.1140 coefficient = 29
-// G * 0.5870 coefficient = 150
-// R * 0.2990 coefficient = 77
-// Add 0.5 = 0x80
-static const struct RgbConstants kRgb24JPEGConstants = {{29, 150, 77, 0},
- 128,
- 0};
-
-static const struct RgbConstants kRawJPEGConstants = {{77, 150, 29, 0}, 128, 0};
-
-// RGB to BT.601 coefficients
-// B * 0.1016 coefficient = 25
-// G * 0.5078 coefficient = 129
-// R * 0.2578 coefficient = 66
-// Add 16.5 = 0x1080
-
-static const struct RgbConstants kRgb24I601Constants = {{25, 129, 66, 0},
- 0x1080,
- 0};
-
-static const struct RgbConstants kRawI601Constants = {{66, 129, 25, 0},
- 0x1080,
- 0};
-
// ARGB expects first 3 values to contain RGB and 4th value is ignored.
void ARGBToYMatrixRow_NEON(const uint8_t* src_argb,
uint8_t* dst_y,
@@ -2800,6 +3024,26 @@ void ARGBToYMatrixRow_NEON(const uint8_t* src_argb,
"v17");
}
+// RGB to JPeg coefficients
+// B * 0.1140 coefficient = 29
+// G * 0.5870 coefficient = 150
+// R * 0.2990 coefficient = 77
+// Add 0.5 = 0x80
+static const struct RgbConstants kRgb24JPEGConstants = {{29, 150, 77, 0}, 128};
+
+static const struct RgbConstants kRawJPEGConstants = {{77, 150, 29, 0}, 128};
+
+// RGB to BT.601 coefficients
+// B * 0.1016 coefficient = 25
+// G * 0.5078 coefficient = 129
+// R * 0.2578 coefficient = 66
+// Add 16.5 = 0x1080
+
+static const struct RgbConstants kRgb24I601Constants = {{25, 129, 66, 0},
+ 0x1080};
+
+static const struct RgbConstants kRawI601Constants = {{66, 129, 25, 0}, 0x1080};
+
void ARGBToYRow_NEON(const uint8_t* src_argb, uint8_t* dst_y, int width) {
ARGBToYMatrixRow_NEON(src_argb, dst_y, width, &kRgb24I601Constants);
}
@@ -2812,6 +3056,10 @@ void ABGRToYRow_NEON(const uint8_t* src_abgr, uint8_t* dst_y, int width) {
ARGBToYMatrixRow_NEON(src_abgr, dst_y, width, &kRawI601Constants);
}
+void ABGRToYJRow_NEON(const uint8_t* src_abgr, uint8_t* dst_yj, int width) {
+ ARGBToYMatrixRow_NEON(src_abgr, dst_yj, width, &kRawJPEGConstants);
+}
+
// RGBA expects first value to be A and ignored, then 3 values to contain RGB.
// Same code as ARGB, except the LD4
void RGBAToYMatrixRow_NEON(const uint8_t* src_rgba,
@@ -3193,6 +3441,8 @@ void ARGBAttenuateRow_NEON(const uint8_t* src_argb,
uint8_t* dst_argb,
int width) {
asm volatile(
+ "movi v7.8h, #0x00ff \n" // 255 for rounding up
+
// Attenuate 8 pixels.
"1: \n"
"ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load 8 ARGB
@@ -3201,16 +3451,16 @@ void ARGBAttenuateRow_NEON(const uint8_t* src_argb,
"prfm pldl1keep, [%0, 448] \n"
"umull v5.8h, v1.8b, v3.8b \n" // g * a
"umull v6.8h, v2.8b, v3.8b \n" // r * a
- "uqrshrn v0.8b, v4.8h, #8 \n" // b >>= 8
- "uqrshrn v1.8b, v5.8h, #8 \n" // g >>= 8
- "uqrshrn v2.8b, v6.8h, #8 \n" // r >>= 8
+ "addhn v0.8b, v4.8h, v7.8h \n" // (b + 255) >> 8
+ "addhn v1.8b, v5.8h, v7.8h \n" // (g + 255) >> 8
+ "addhn v2.8b, v6.8h, v7.8h \n" // (r + 255) >> 8
"st4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%1], #32 \n" // store 8 ARGB
"b.gt 1b \n"
: "+r"(src_argb), // %0
"+r"(dst_argb), // %1
"+r"(width) // %2
:
- : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6");
+ : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7");
}
// Quantize 8 ARGB pixels (32 bytes).
@@ -3751,6 +4001,86 @@ void ByteToFloatRow_NEON(const uint8_t* src,
: "cc", "memory", "v1", "v2", "v3");
}
+// Convert FP16 Half Floats to FP32 Floats
+void ConvertFP16ToFP32Row_NEON(const uint16_t* src, // fp16
+ float* dst,
+ int width) {
+ asm volatile(
+ "1: \n"
+ "ld1 {v1.8h}, [%0], #16 \n" // load 8 halffloats
+ "subs %w2, %w2, #8 \n" // 8 floats per loop
+ "prfm pldl1keep, [%0, 448] \n"
+ "fcvtl v2.4s, v1.4h \n" // 8 floats
+ "fcvtl2 v3.4s, v1.8h \n"
+ "stp q2, q3, [%1], #32 \n" // store 8 floats
+ "b.gt 1b \n"
+ : "+r"(src), // %0
+ "+r"(dst), // %1
+ "+r"(width) // %2
+ :
+ : "cc", "memory", "v1", "v2", "v3");
+}
+
+// Convert FP16 Half Floats to FP32 Floats
+// Read a column and write a row
+void ConvertFP16ToFP32Column_NEON(const uint16_t* src, // fp16
+ int src_stride, // stride in elements
+ float* dst,
+ int width) {
+ asm volatile(
+ "cmp %w2, #8 \n" // Is there 8 rows?
+ "b.lo 2f \n"
+ "1: \n"
+ "ld1 {v0.h}[0], [%0], %3 \n" // load 8 halffloats
+ "ld1 {v0.h}[1], [%0], %3 \n"
+ "ld1 {v0.h}[2], [%0], %3 \n"
+ "ld1 {v0.h}[3], [%0], %3 \n"
+ "ld1 {v1.h}[0], [%0], %3 \n"
+ "ld1 {v1.h}[1], [%0], %3 \n"
+ "ld1 {v1.h}[2], [%0], %3 \n"
+ "ld1 {v1.h}[3], [%0], %3 \n"
+ "subs %w2, %w2, #8 \n" // 8 rows per loop
+ "prfm pldl1keep, [%0, 448] \n"
+ "fcvtl v2.4s, v0.4h \n" // 4 floats
+ "fcvtl v3.4s, v1.4h \n" // 4 more floats
+ "stp q2, q3, [%1], #32 \n" // store 8 floats
+ "b.gt 1b \n"
+ "cmp %w2, #1 \n" // Is there 1 value?
+ "b.lo 3f \n"
+ "2: \n"
+ "ld1 {v1.h}[0], [%0], %3 \n" // load 1 halffloats
+ "subs %w2, %w2, #1 \n" // 1 floats per loop
+ "fcvtl v2.4s, v1.4h \n" // 1 floats
+ "str s2, [%1], #4 \n" // store 1 floats
+ "b.gt 2b \n"
+ "3: \n"
+ : "+r"(src), // %0
+ "+r"(dst), // %1
+ "+r"(width) // %2
+ : "r"((ptrdiff_t)(src_stride * 2)) // %3
+ : "cc", "memory", "v0", "v1", "v2", "v3");
+}
+
+// Convert FP32 Floats to FP16 Half Floats
+void ConvertFP32ToFP16Row_NEON(const float* src,
+ uint16_t* dst, // fp16
+ int width) {
+ asm volatile(
+ "1: \n"
+ "ldp q2, q3, [%0], #32 \n" // load 8 floats
+ "subs %w2, %w2, #8 \n" // 8 floats per loop
+ "prfm pldl1keep, [%0, 448] \n"
+ "fcvtn v1.4h, v2.4s \n" // 8 fp16 halffloats
+ "fcvtn2 v1.8h, v3.4s \n"
+ "str q1, [%1], #16 \n" // store 8 fp16 halffloats
+ "b.gt 1b \n"
+ : "+r"(src), // %0
+ "+r"(dst), // %1
+ "+r"(width) // %2
+ :
+ : "cc", "memory", "v1", "v2", "v3");
+}
+
float ScaleMaxSamples_NEON(const float* src,
float* dst,
float scale,
@@ -4241,23 +4571,19 @@ void DivideRow_16_NEON(const uint16_t* src_y,
int scale,
int width) {
asm volatile(
- "dup v0.8h, %w3 \n"
+ "dup v4.8h, %w3 \n"
"1: \n"
- "ldp q1, q2, [%0], #32 \n"
- "ushll v3.4s, v1.4h, #0 \n"
- "ushll v4.4s, v2.4h, #0 \n"
+ "ldp q2, q3, [%0], #32 \n"
+ "umull v0.4s, v2.4h, v4.4h \n"
+ "umull2 v1.4s, v2.8h, v4.8h \n"
+ "umull v2.4s, v3.4h, v4.4h \n"
+ "umull2 v3.4s, v3.8h, v4.8h \n"
"prfm pldl1keep, [%0, 448] \n"
- "ushll2 v1.4s, v1.8h, #0 \n"
- "ushll2 v2.4s, v2.8h, #0 \n"
- "mul v3.4s, v0.4s, v3.4s \n"
- "mul v4.4s, v0.4s, v4.4s \n"
- "mul v1.4s, v0.4s, v1.4s \n"
- "mul v2.4s, v0.4s, v2.4s \n"
- "shrn v3.4h, v3.4s, #16 \n"
- "shrn v4.4h, v4.4s, #16 \n"
- "shrn2 v3.8h, v1.4s, #16 \n"
- "shrn2 v4.8h, v2.4s, #16 \n"
- "stp q3, q3, [%1], #32 \n" // store 16 pixels
+ "shrn v0.4h, v0.4s, #16 \n"
+ "shrn2 v0.8h, v1.4s, #16 \n"
+ "shrn v1.4h, v2.4s, #16 \n"
+ "shrn2 v1.8h, v3.4s, #16 \n"
+ "stp q0, q1, [%1], #32 \n" // store 16 pixels
"subs %w2, %w2, #16 \n" // 16 src pixels per loop
"b.gt 1b \n"
: "+r"(src_y), // %0
diff --git a/source/row_rvv.cc b/source/row_rvv.cc
new file mode 100644
index 00000000..0bf2bef6
--- /dev/null
+++ b/source/row_rvv.cc
@@ -0,0 +1,1394 @@
+/*
+ * Copyright 2023 The LibYuv Project Authors. All rights reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+/*
+ * Copyright (c) 2023 SiFive, Inc. All rights reserved.
+ *
+ * Contributed by Darren Hsieh <darren.hsieh@sifive.com>
+ * Contributed by Bruce Lai <bruce.lai@sifive.com>
+ */
+
+#include "libyuv/row.h"
+
+// This module is for clang rvv. GCC hasn't supported segment load & store.
+#if !defined(LIBYUV_DISABLE_RVV) && defined(__riscv_vector) && \
+ defined(__clang__)
+#include <assert.h>
+#include <riscv_vector.h>
+
+#ifdef __cplusplus
+namespace libyuv {
+extern "C" {
+#endif
+
+// Fill YUV -> RGB conversion constants into vectors
+// NOTE: To match behavior on other platforms, vxrm (fixed-point rounding mode
+// register) is set to round-to-nearest-up mode(0).
+#define YUVTORGB_SETUP(yuvconst, ub, vr, ug, vg, yg, bb, bg, br) \
+ { \
+ asm volatile("csrwi vxrm, 0"); \
+ ub = yuvconst->kUVCoeff[0]; \
+ vr = yuvconst->kUVCoeff[1]; \
+ ug = yuvconst->kUVCoeff[2]; \
+ vg = yuvconst->kUVCoeff[3]; \
+ yg = yuvconst->kRGBCoeffBias[0]; \
+ bb = yuvconst->kRGBCoeffBias[1] + 32; \
+ bg = yuvconst->kRGBCoeffBias[2] - 32; \
+ br = yuvconst->kRGBCoeffBias[3] + 32; \
+ }
+
+// Read [2*VLEN/8] Y, [VLEN/8] U and [VLEN/8] V from 422
+#define READYUV422(vl, w, src_y, src_u, src_v, v_u, v_v, v_y_16) \
+ { \
+ vuint8m1_t v_tmp0, v_tmp1; \
+ vuint8m2_t v_y; \
+ vuint16m2_t v_u_16, v_v_16; \
+ vl = __riscv_vsetvl_e8m1((w + 1) / 2); \
+ v_tmp0 = __riscv_vle8_v_u8m1(src_u, vl); \
+ v_u_16 = __riscv_vwaddu_vx_u16m2(v_tmp0, 0, vl); \
+ v_tmp1 = __riscv_vle8_v_u8m1(src_v, vl); \
+ v_v_16 = __riscv_vwaddu_vx_u16m2(v_tmp1, 0, vl); \
+ v_v_16 = __riscv_vmul_vx_u16m2(v_v_16, 0x0101, vl); \
+ v_u_16 = __riscv_vmul_vx_u16m2(v_u_16, 0x0101, vl); \
+ v_v = __riscv_vreinterpret_v_u16m2_u8m2(v_v_16); \
+ v_u = __riscv_vreinterpret_v_u16m2_u8m2(v_u_16); \
+ vl = __riscv_vsetvl_e8m2(w); \
+ v_y = __riscv_vle8_v_u8m2(src_y, vl); \
+ v_y_16 = __riscv_vwaddu_vx_u16m4(v_y, 0, vl); \
+ }
+
+// Read [2*VLEN/8] Y, [2*VLEN/8] U, and [2*VLEN/8] V from 444
+#define READYUV444(vl, w, src_y, src_u, src_v, v_u, v_v, v_y_16) \
+ { \
+ vuint8m2_t v_y; \
+ vl = __riscv_vsetvl_e8m2(w); \
+ v_y = __riscv_vle8_v_u8m2(src_y, vl); \
+ v_u = __riscv_vle8_v_u8m2(src_u, vl); \
+ v_v = __riscv_vle8_v_u8m2(src_v, vl); \
+ v_y_16 = __riscv_vwaddu_vx_u16m4(v_y, 0, vl); \
+ }
+
+// Convert from YUV to fixed point RGB
+#define YUVTORGB(vl, v_u, v_v, ub, vr, ug, vg, yg, bb, bg, br, v_y_16, v_g_16, \
+ v_b_16, v_r_16) \
+ { \
+ vuint16m4_t v_tmp0, v_tmp1, v_tmp2, v_tmp3, v_tmp4; \
+ vuint32m8_t v_tmp5; \
+ v_tmp0 = __riscv_vwmulu_vx_u16m4(v_u, ug, vl); \
+ v_y_16 = __riscv_vmul_vx_u16m4(v_y_16, 0x0101, vl); \
+ v_tmp0 = __riscv_vwmaccu_vx_u16m4(v_tmp0, vg, v_v, vl); \
+ v_tmp1 = __riscv_vwmulu_vx_u16m4(v_u, ub, vl); \
+ v_tmp5 = __riscv_vwmulu_vx_u32m8(v_y_16, yg, vl); \
+ v_tmp2 = __riscv_vnsrl_wx_u16m4(v_tmp5, 16, vl); \
+ v_tmp3 = __riscv_vadd_vx_u16m4(v_tmp2, bg, vl); \
+ v_tmp4 = __riscv_vadd_vv_u16m4(v_tmp2, v_tmp1, vl); \
+ v_tmp2 = __riscv_vwmaccu_vx_u16m4(v_tmp2, vr, v_v, vl); \
+ v_g_16 = __riscv_vssubu_vv_u16m4(v_tmp3, v_tmp0, vl); \
+ v_b_16 = __riscv_vssubu_vx_u16m4(v_tmp4, bb, vl); \
+ v_r_16 = __riscv_vssubu_vx_u16m4(v_tmp2, br, vl); \
+ }
+
+// Convert from fixed point RGB To 8 bit RGB
+#define RGBTORGB8(vl, v_g_16, v_b_16, v_r_16, v_g, v_b, v_r) \
+ { \
+ v_g = __riscv_vnclipu_wx_u8m2(v_g_16, 6, vl); \
+ v_b = __riscv_vnclipu_wx_u8m2(v_b_16, 6, vl); \
+ v_r = __riscv_vnclipu_wx_u8m2(v_r_16, 6, vl); \
+ }
+
+// Read [2*VLEN/8] Y from src_y; Read [VLEN/8] U and [VLEN/8] V from src_uv
+#define READNV12(vl, w, src_y, src_uv, v_u, v_v, v_y_16) \
+ { \
+ vuint8m1_t v_tmp0, v_tmp1; \
+ vuint8m2_t v_y; \
+ vuint16m2_t v_u_16, v_v_16; \
+ vl = __riscv_vsetvl_e8m1((w + 1) / 2); \
+ __riscv_vlseg2e8_v_u8m1(&v_tmp0, &v_tmp1, src_uv, vl); \
+ v_u_16 = __riscv_vwaddu_vx_u16m2(v_tmp0, 0, vl); \
+ v_v_16 = __riscv_vwaddu_vx_u16m2(v_tmp1, 0, vl); \
+ v_v_16 = __riscv_vmul_vx_u16m2(v_v_16, 0x0101, vl); \
+ v_u_16 = __riscv_vmul_vx_u16m2(v_u_16, 0x0101, vl); \
+ v_v = __riscv_vreinterpret_v_u16m2_u8m2(v_v_16); \
+ v_u = __riscv_vreinterpret_v_u16m2_u8m2(v_u_16); \
+ vl = __riscv_vsetvl_e8m2(w); \
+ v_y = __riscv_vle8_v_u8m2(src_y, vl); \
+ v_y_16 = __riscv_vwaddu_vx_u16m4(v_y, 0, vl); \
+ }
+
+// Read 2*[VLEN/8] Y from src_y; Read [VLEN/8] U and [VLEN/8] V from src_vu
+#define READNV21(vl, w, src_y, src_vu, v_u, v_v, v_y_16) \
+ { \
+ vuint8m1_t v_tmp0, v_tmp1; \
+ vuint8m2_t v_y; \
+ vuint16m2_t v_u_16, v_v_16; \
+ vl = __riscv_vsetvl_e8m1((w + 1) / 2); \
+ __riscv_vlseg2e8_v_u8m1(&v_tmp0, &v_tmp1, src_vu, vl); \
+ v_u_16 = __riscv_vwaddu_vx_u16m2(v_tmp1, 0, vl); \
+ v_v_16 = __riscv_vwaddu_vx_u16m2(v_tmp0, 0, vl); \
+ v_v_16 = __riscv_vmul_vx_u16m2(v_v_16, 0x0101, vl); \
+ v_u_16 = __riscv_vmul_vx_u16m2(v_u_16, 0x0101, vl); \
+ v_v = __riscv_vreinterpret_v_u16m2_u8m2(v_v_16); \
+ v_u = __riscv_vreinterpret_v_u16m2_u8m2(v_u_16); \
+ vl = __riscv_vsetvl_e8m2(w); \
+ v_y = __riscv_vle8_v_u8m2(src_y, vl); \
+ v_y_16 = __riscv_vwaddu_vx_u16m4(v_y, 0, vl); \
+ }
+
+#ifdef HAS_ARGBTOAR64ROW_RVV
+void ARGBToAR64Row_RVV(const uint8_t* src_argb, uint16_t* dst_ar64, int width) {
+ size_t avl = (size_t)4 * width;
+ do {
+ vuint16m8_t v_ar64;
+ vuint8m4_t v_argb;
+ size_t vl = __riscv_vsetvl_e8m4(avl);
+ v_argb = __riscv_vle8_v_u8m4(src_argb, vl);
+ v_ar64 = __riscv_vwaddu_vx_u16m8(v_argb, 0, vl);
+ v_ar64 = __riscv_vmul_vx_u16m8(v_ar64, 0x0101, vl);
+ __riscv_vse16_v_u16m8(dst_ar64, v_ar64, vl);
+ avl -= vl;
+ src_argb += vl;
+ dst_ar64 += vl;
+ } while (avl > 0);
+}
+#endif
+
+#ifdef HAS_ARGBTOAB64ROW_RVV
+void ARGBToAB64Row_RVV(const uint8_t* src_argb, uint16_t* dst_ab64, int width) {
+ size_t avl = (size_t)width;
+ do {
+ vuint16m2_t v_b_16, v_g_16, v_r_16, v_a_16;
+ vuint8m1_t v_b, v_g, v_r, v_a;
+ size_t vl = __riscv_vsetvl_e8m1(avl);
+ __riscv_vlseg4e8_v_u8m1(&v_b, &v_g, &v_r, &v_a, src_argb, vl);
+ v_b_16 = __riscv_vwaddu_vx_u16m2(v_b, 0, vl);
+ v_g_16 = __riscv_vwaddu_vx_u16m2(v_g, 0, vl);
+ v_r_16 = __riscv_vwaddu_vx_u16m2(v_r, 0, vl);
+ v_a_16 = __riscv_vwaddu_vx_u16m2(v_a, 0, vl);
+ v_b_16 = __riscv_vmul_vx_u16m2(v_b_16, 0x0101, vl);
+ v_g_16 = __riscv_vmul_vx_u16m2(v_g_16, 0x0101, vl);
+ v_r_16 = __riscv_vmul_vx_u16m2(v_r_16, 0x0101, vl);
+ v_a_16 = __riscv_vmul_vx_u16m2(v_a_16, 0x0101, vl);
+ __riscv_vsseg4e16_v_u16m2(dst_ab64, v_r_16, v_g_16, v_b_16, v_a_16, vl);
+ avl -= vl;
+ src_argb += 4 * vl;
+ dst_ab64 += 4 * vl;
+ } while (avl > 0);
+}
+#endif
+
+#ifdef HAS_AR64TOARGBROW_RVV
+void AR64ToARGBRow_RVV(const uint16_t* src_ar64, uint8_t* dst_argb, int width) {
+ size_t avl = (size_t)4 * width;
+ do {
+ vuint16m8_t v_ar64;
+ vuint8m4_t v_argb;
+ size_t vl = __riscv_vsetvl_e16m8(avl);
+ v_ar64 = __riscv_vle16_v_u16m8(src_ar64, vl);
+ v_argb = __riscv_vnsrl_wx_u8m4(v_ar64, 8, vl);
+ __riscv_vse8_v_u8m4(dst_argb, v_argb, vl);
+ avl -= vl;
+ src_ar64 += vl;
+ dst_argb += vl;
+ } while (avl > 0);
+}
+#endif
+
+#ifdef HAS_AR64TOAB64ROW_RVV
+void AR64ToAB64Row_RVV(const uint16_t* src_ar64,
+ uint16_t* dst_ab64,
+ int width) {
+ size_t w = (size_t)width;
+ do {
+ size_t vl = __riscv_vsetvl_e16m2(w);
+ vuint16m2_t v_b, v_g, v_r, v_a;
+ __riscv_vlseg4e16_v_u16m2(&v_b, &v_g, &v_r, &v_a, src_ar64, vl);
+ __riscv_vsseg4e16_v_u16m2(dst_ab64, v_r, v_g, v_b, v_a, vl);
+ w -= vl;
+ src_ar64 += vl * 4;
+ dst_ab64 += vl * 4;
+ } while (w > 0);
+}
+#endif
+
+#ifdef HAS_AB64TOARGBROW_RVV
+void AB64ToARGBRow_RVV(const uint16_t* src_ab64, uint8_t* dst_argb, int width) {
+ size_t avl = (size_t)width;
+ do {
+ vuint16m2_t v_b_16, v_g_16, v_r_16, v_a_16;
+ vuint8m1_t v_b, v_g, v_r, v_a;
+ size_t vl = __riscv_vsetvl_e16m2(avl);
+ __riscv_vlseg4e16_v_u16m2(&v_r_16, &v_g_16, &v_b_16, &v_a_16, src_ab64, vl);
+ v_b = __riscv_vnsrl_wx_u8m1(v_b_16, 8, vl);
+ v_g = __riscv_vnsrl_wx_u8m1(v_g_16, 8, vl);
+ v_r = __riscv_vnsrl_wx_u8m1(v_r_16, 8, vl);
+ v_a = __riscv_vnsrl_wx_u8m1(v_a_16, 8, vl);
+ __riscv_vsseg4e8_v_u8m1(dst_argb, v_b, v_g, v_r, v_a, vl);
+ avl -= vl;
+ src_ab64 += 4 * vl;
+ dst_argb += 4 * vl;
+ } while (avl > 0);
+}
+#endif
+
+#ifdef HAS_RAWTOARGBROW_RVV
+void RAWToARGBRow_RVV(const uint8_t* src_raw, uint8_t* dst_argb, int width) {
+ size_t w = (size_t)width;
+ size_t vl = __riscv_vsetvl_e8m2(w);
+ vuint8m2_t v_a = __riscv_vmv_v_x_u8m2(255u, vl);
+ do {
+ vuint8m2_t v_b, v_g, v_r;
+ __riscv_vlseg3e8_v_u8m2(&v_r, &v_g, &v_b, src_raw, vl);
+ __riscv_vsseg4e8_v_u8m2(dst_argb, v_b, v_g, v_r, v_a, vl);
+ w -= vl;
+ src_raw += vl * 3;
+ dst_argb += vl * 4;
+ vl = __riscv_vsetvl_e8m2(w);
+ } while (w > 0);
+}
+#endif
+
+#ifdef HAS_RAWTORGBAROW_RVV
+void RAWToRGBARow_RVV(const uint8_t* src_raw, uint8_t* dst_rgba, int width) {
+ size_t w = (size_t)width;
+ size_t vl = __riscv_vsetvl_e8m2(w);
+ vuint8m2_t v_a = __riscv_vmv_v_x_u8m2(255u, vl);
+ do {
+ vuint8m2_t v_b, v_g, v_r;
+ __riscv_vlseg3e8_v_u8m2(&v_r, &v_g, &v_b, src_raw, vl);
+ __riscv_vsseg4e8_v_u8m2(dst_rgba, v_a, v_b, v_g, v_r, vl);
+ w -= vl;
+ src_raw += vl * 3;
+ dst_rgba += vl * 4;
+ vl = __riscv_vsetvl_e8m2(w);
+ } while (w > 0);
+}
+#endif
+
+#ifdef HAS_RAWTORGB24ROW_RVV
+void RAWToRGB24Row_RVV(const uint8_t* src_raw, uint8_t* dst_rgb24, int width) {
+ size_t w = (size_t)width;
+ do {
+ vuint8m2_t v_b, v_g, v_r;
+ size_t vl = __riscv_vsetvl_e8m2(w);
+ __riscv_vlseg3e8_v_u8m2(&v_b, &v_g, &v_r, src_raw, vl);
+ __riscv_vsseg3e8_v_u8m2(dst_rgb24, v_r, v_g, v_b, vl);
+ w -= vl;
+ src_raw += vl * 3;
+ dst_rgb24 += vl * 3;
+ } while (w > 0);
+}
+#endif
+
+#ifdef HAS_ARGBTORAWROW_RVV
+void ARGBToRAWRow_RVV(const uint8_t* src_argb, uint8_t* dst_raw, int width) {
+ size_t w = (size_t)width;
+ do {
+ vuint8m2_t v_b, v_g, v_r, v_a;
+ size_t vl = __riscv_vsetvl_e8m2(w);
+ __riscv_vlseg4e8_v_u8m2(&v_b, &v_g, &v_r, &v_a, src_argb, vl);
+ __riscv_vsseg3e8_v_u8m2(dst_raw, v_r, v_g, v_b, vl);
+ w -= vl;
+ src_argb += vl * 4;
+ dst_raw += vl * 3;
+ } while (w > 0);
+}
+#endif
+
+#ifdef HAS_ARGBTORGB24ROW_RVV
+void ARGBToRGB24Row_RVV(const uint8_t* src_argb,
+ uint8_t* dst_rgb24,
+ int width) {
+ size_t w = (size_t)width;
+ do {
+ vuint8m2_t v_b, v_g, v_r, v_a;
+ size_t vl = __riscv_vsetvl_e8m2(w);
+ __riscv_vlseg4e8_v_u8m2(&v_b, &v_g, &v_r, &v_a, src_argb, vl);
+ __riscv_vsseg3e8_v_u8m2(dst_rgb24, v_b, v_g, v_r, vl);
+ w -= vl;
+ src_argb += vl * 4;
+ dst_rgb24 += vl * 3;
+ } while (w > 0);
+}
+#endif
+
+#ifdef HAS_ARGBTOABGRROW_RVV
+void ARGBToABGRRow_RVV(const uint8_t* src_argb, uint8_t* dst_abgr, int width) {
+ size_t w = (size_t)width;
+ do {
+ size_t vl = __riscv_vsetvl_e8m2(w);
+ vuint8m2_t v_a, v_r, v_g, v_b;
+ __riscv_vlseg4e8_v_u8m2(&v_b, &v_g, &v_r, &v_a, src_argb, vl);
+ __riscv_vsseg4e8_v_u8m2(dst_abgr, v_r, v_g, v_b, v_a, vl);
+ w -= vl;
+ src_argb += vl * 4;
+ dst_abgr += vl * 4;
+ } while (w > 0);
+}
+#endif
+
+#ifdef HAS_ARGBTOBGRAROW_RVV
+void ARGBToBGRARow_RVV(const uint8_t* src_argb, uint8_t* dst_bgra, int width) {
+ size_t w = (size_t)width;
+ do {
+ size_t vl = __riscv_vsetvl_e8m2(w);
+ vuint8m2_t v_a, v_r, v_g, v_b;
+ __riscv_vlseg4e8_v_u8m2(&v_b, &v_g, &v_r, &v_a, src_argb, vl);
+ __riscv_vsseg4e8_v_u8m2(dst_bgra, v_a, v_r, v_g, v_b, vl);
+ w -= vl;
+ src_argb += vl * 4;
+ dst_bgra += vl * 4;
+ } while (w > 0);
+}
+#endif
+
+#ifdef HAS_ARGBTORGBAROW_RVV
+void ARGBToRGBARow_RVV(const uint8_t* src_argb, uint8_t* dst_rgba, int width) {
+ size_t w = (size_t)width;
+ do {
+ size_t vl = __riscv_vsetvl_e8m2(w);
+ vuint8m2_t v_a, v_r, v_g, v_b;
+ __riscv_vlseg4e8_v_u8m2(&v_b, &v_g, &v_r, &v_a, src_argb, vl);
+ __riscv_vsseg4e8_v_u8m2(dst_rgba, v_a, v_b, v_g, v_r, vl);
+ w -= vl;
+ src_argb += vl * 4;
+ dst_rgba += vl * 4;
+ } while (w > 0);
+}
+#endif
+
+#ifdef HAS_RGBATOARGBROW_RVV
+void RGBAToARGBRow_RVV(const uint8_t* src_rgba, uint8_t* dst_argb, int width) {
+ size_t w = (size_t)width;
+ do {
+ size_t vl = __riscv_vsetvl_e8m2(w);
+ vuint8m2_t v_a, v_r, v_g, v_b;
+ __riscv_vlseg4e8_v_u8m2(&v_a, &v_b, &v_g, &v_r, src_rgba, vl);
+ __riscv_vsseg4e8_v_u8m2(dst_argb, v_b, v_g, v_r, v_a, vl);
+ w -= vl;
+ src_rgba += vl * 4;
+ dst_argb += vl * 4;
+ } while (w > 0);
+}
+#endif
+
+#ifdef HAS_RGB24TOARGBROW_RVV
+void RGB24ToARGBRow_RVV(const uint8_t* src_rgb24,
+ uint8_t* dst_argb,
+ int width) {
+ size_t w = (size_t)width;
+ size_t vl = __riscv_vsetvl_e8m2(w);
+ vuint8m2_t v_a = __riscv_vmv_v_x_u8m2(255u, vl);
+ do {
+ vuint8m2_t v_b, v_g, v_r;
+ __riscv_vlseg3e8_v_u8m2(&v_b, &v_g, &v_r, src_rgb24, vl);
+ __riscv_vsseg4e8_v_u8m2(dst_argb, v_b, v_g, v_r, v_a, vl);
+ w -= vl;
+ src_rgb24 += vl * 3;
+ dst_argb += vl * 4;
+ vl = __riscv_vsetvl_e8m2(w);
+ } while (w > 0);
+}
+#endif
+
+#ifdef HAS_I444TOARGBROW_RVV
+void I444ToARGBRow_RVV(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ size_t w = (size_t)width;
+ size_t vl = __riscv_vsetvl_e8m2(w);
+ uint8_t ub, vr, ug, vg;
+ int16_t yg, bb, bg, br;
+ vuint8m2_t v_u, v_v;
+ vuint8m2_t v_b, v_g, v_r, v_a;
+ vuint16m4_t v_y_16, v_g_16, v_b_16, v_r_16;
+ YUVTORGB_SETUP(yuvconstants, ub, vr, ug, vg, yg, bb, bg, br);
+ v_a = __riscv_vmv_v_x_u8m2(255u, vl);
+ do {
+ READYUV444(vl, w, src_y, src_u, src_v, v_u, v_v, v_y_16);
+ YUVTORGB(vl, v_u, v_v, ub, vr, ug, vg, yg, bb, bg, br, v_y_16, v_g_16,
+ v_b_16, v_r_16);
+ RGBTORGB8(vl, v_g_16, v_b_16, v_r_16, v_g, v_b, v_r);
+ __riscv_vsseg4e8_v_u8m2(dst_argb, v_b, v_g, v_r, v_a, vl);
+ w -= vl;
+ src_y += vl;
+ src_u += vl;
+ src_v += vl;
+ dst_argb += vl * 4;
+ } while (w > 0);
+}
+#endif
+
+#ifdef HAS_I444ALPHATOARGBROW_RVV
+void I444AlphaToARGBRow_RVV(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ const uint8_t* src_a,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ size_t vl;
+ size_t w = (size_t)width;
+ uint8_t ub, vr, ug, vg;
+ int16_t yg, bb, bg, br;
+ vuint8m2_t v_u, v_v;
+ vuint8m2_t v_b, v_g, v_r, v_a;
+ vuint16m4_t v_y_16, v_g_16, v_b_16, v_r_16;
+ YUVTORGB_SETUP(yuvconstants, ub, vr, ug, vg, yg, bb, bg, br);
+ do {
+ READYUV444(vl, w, src_y, src_u, src_v, v_u, v_v, v_y_16);
+ v_a = __riscv_vle8_v_u8m2(src_a, vl);
+ YUVTORGB(vl, v_u, v_v, ub, vr, ug, vg, yg, bb, bg, br, v_y_16, v_g_16,
+ v_b_16, v_r_16);
+ RGBTORGB8(vl, v_g_16, v_b_16, v_r_16, v_g, v_b, v_r);
+ __riscv_vsseg4e8_v_u8m2(dst_argb, v_b, v_g, v_r, v_a, vl);
+ w -= vl;
+ src_y += vl;
+ src_a += vl;
+ src_u += vl;
+ src_v += vl;
+ dst_argb += vl * 4;
+ } while (w > 0);
+}
+#endif
+
+#ifdef HAS_I444TORGB24ROW_RVV
+void I444ToRGB24Row_RVV(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_rgb24,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ size_t vl;
+ size_t w = (size_t)width;
+ uint8_t ub, vr, ug, vg;
+ int16_t yg, bb, bg, br;
+ vuint8m2_t v_u, v_v;
+ vuint8m2_t v_b, v_g, v_r;
+ vuint16m4_t v_y_16, v_g_16, v_b_16, v_r_16;
+ YUVTORGB_SETUP(yuvconstants, ub, vr, ug, vg, yg, bb, bg, br);
+ do {
+ READYUV444(vl, w, src_y, src_u, src_v, v_u, v_v, v_y_16);
+ YUVTORGB(vl, v_u, v_v, ub, vr, ug, vg, yg, bb, bg, br, v_y_16, v_g_16,
+ v_b_16, v_r_16);
+ RGBTORGB8(vl, v_g_16, v_b_16, v_r_16, v_g, v_b, v_r);
+ __riscv_vsseg3e8_v_u8m2(dst_rgb24, v_b, v_g, v_r, vl);
+ w -= vl;
+ src_y += vl;
+ src_u += vl;
+ src_v += vl;
+ dst_rgb24 += vl * 3;
+ } while (w > 0);
+}
+#endif
+
+#ifdef HAS_I422TOARGBROW_RVV
+void I422ToARGBRow_RVV(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ size_t w = (size_t)width;
+ size_t vl = __riscv_vsetvl_e8m2(w);
+ uint8_t ub, vr, ug, vg;
+ int16_t yg, bb, bg, br;
+ vuint8m2_t v_u, v_v;
+ vuint8m2_t v_b, v_g, v_r, v_a;
+ vuint16m4_t v_y_16, v_g_16, v_b_16, v_r_16;
+ YUVTORGB_SETUP(yuvconstants, ub, vr, ug, vg, yg, bb, bg, br);
+ v_a = __riscv_vmv_v_x_u8m2(255u, vl);
+ do {
+ READYUV422(vl, w, src_y, src_u, src_v, v_u, v_v, v_y_16);
+ YUVTORGB(vl, v_u, v_v, ub, vr, ug, vg, yg, bb, bg, br, v_y_16, v_g_16,
+ v_b_16, v_r_16);
+ RGBTORGB8(vl, v_g_16, v_b_16, v_r_16, v_g, v_b, v_r);
+ __riscv_vsseg4e8_v_u8m2(dst_argb, v_b, v_g, v_r, v_a, vl);
+ w -= vl;
+ src_y += vl;
+ src_u += vl / 2;
+ src_v += vl / 2;
+ dst_argb += vl * 4;
+ } while (w > 0);
+}
+#endif
+
+#ifdef HAS_I422ALPHATOARGBROW_RVV
+void I422AlphaToARGBRow_RVV(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ const uint8_t* src_a,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ size_t vl;
+ size_t w = (size_t)width;
+ uint8_t ub, vr, ug, vg;
+ int16_t yg, bb, bg, br;
+ vuint8m2_t v_u, v_v;
+ vuint8m2_t v_b, v_g, v_r, v_a;
+ vuint16m4_t v_y_16, v_g_16, v_b_16, v_r_16;
+ YUVTORGB_SETUP(yuvconstants, ub, vr, ug, vg, yg, bb, bg, br);
+ do {
+ READYUV422(vl, w, src_y, src_u, src_v, v_u, v_v, v_y_16);
+ v_a = __riscv_vle8_v_u8m2(src_a, vl);
+ YUVTORGB(vl, v_u, v_v, ub, vr, ug, vg, yg, bb, bg, br, v_y_16, v_g_16,
+ v_b_16, v_r_16);
+ RGBTORGB8(vl, v_g_16, v_b_16, v_r_16, v_g, v_b, v_r);
+ __riscv_vsseg4e8_v_u8m2(dst_argb, v_b, v_g, v_r, v_a, vl);
+ w -= vl;
+ src_y += vl;
+ src_a += vl;
+ src_u += vl / 2;
+ src_v += vl / 2;
+ dst_argb += vl * 4;
+ } while (w > 0);
+}
+#endif
+
+#ifdef HAS_I422TORGBAROW_RVV
+void I422ToRGBARow_RVV(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_rgba,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ size_t w = (size_t)width;
+ size_t vl = __riscv_vsetvl_e8m2(w);
+ uint8_t ub, vr, ug, vg;
+ int16_t yg, bb, bg, br;
+ vuint8m2_t v_u, v_v;
+ vuint8m2_t v_b, v_g, v_r, v_a;
+ vuint16m4_t v_y_16, v_g_16, v_b_16, v_r_16;
+ YUVTORGB_SETUP(yuvconstants, ub, vr, ug, vg, yg, bb, bg, br);
+ v_a = __riscv_vmv_v_x_u8m2(255u, vl);
+ do {
+ READYUV422(vl, w, src_y, src_u, src_v, v_u, v_v, v_y_16);
+ YUVTORGB(vl, v_u, v_v, ub, vr, ug, vg, yg, bb, bg, br, v_y_16, v_g_16,
+ v_b_16, v_r_16);
+ RGBTORGB8(vl, v_g_16, v_b_16, v_r_16, v_g, v_b, v_r);
+ __riscv_vsseg4e8_v_u8m2(dst_rgba, v_a, v_b, v_g, v_r, vl);
+ w -= vl;
+ src_y += vl;
+ src_u += vl / 2;
+ src_v += vl / 2;
+ dst_rgba += vl * 4;
+ } while (w > 0);
+}
+#endif
+
+#ifdef HAS_I422TORGB24ROW_RVV
+void I422ToRGB24Row_RVV(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_rgb24,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ size_t vl;
+ size_t w = (size_t)width;
+ uint8_t ub, vr, ug, vg;
+ int16_t yg, bb, bg, br;
+ vuint8m2_t v_u, v_v;
+ vuint8m2_t v_b, v_g, v_r;
+ vuint16m4_t v_y_16, v_g_16, v_b_16, v_r_16;
+ YUVTORGB_SETUP(yuvconstants, ub, vr, ug, vg, yg, bb, bg, br);
+ do {
+ READYUV422(vl, w, src_y, src_u, src_v, v_u, v_v, v_y_16);
+ YUVTORGB(vl, v_u, v_v, ub, vr, ug, vg, yg, bb, bg, br, v_y_16, v_g_16,
+ v_b_16, v_r_16);
+ RGBTORGB8(vl, v_g_16, v_b_16, v_r_16, v_g, v_b, v_r);
+ __riscv_vsseg3e8_v_u8m2(dst_rgb24, v_b, v_g, v_r, vl);
+ w -= vl;
+ src_y += vl;
+ src_u += vl / 2;
+ src_v += vl / 2;
+ dst_rgb24 += vl * 3;
+ } while (w > 0);
+}
+#endif
+
+#ifdef HAS_I400TOARGBROW_RVV
+void I400ToARGBRow_RVV(const uint8_t* src_y,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ size_t w = (size_t)width;
+ size_t vl = __riscv_vsetvl_e8m2(w);
+ const bool is_yb_positive = (yuvconstants->kRGBCoeffBias[4] >= 0);
+ vuint8m2_t v_a = __riscv_vmv_v_x_u8m2(255u, vl);
+ vuint16m4_t v_yb;
+ vuint16m4_t v_yg = __riscv_vmv_v_x_u16m4(yuvconstants->kRGBCoeffBias[0], vl);
+ // To match behavior on other platforms, vxrm (fixed-point rounding mode
+ // register) sets to round-to-nearest-up mode(0).
+ asm volatile("csrwi vxrm, 0");
+ if (is_yb_positive) {
+ v_yb = __riscv_vmv_v_x_u16m4(yuvconstants->kRGBCoeffBias[4] - 32, vl);
+ } else {
+ v_yb = __riscv_vmv_v_x_u16m4(-yuvconstants->kRGBCoeffBias[4] + 32, vl);
+ }
+ do {
+ vuint8m2_t v_y, v_out;
+ vuint16m4_t v_y_16, v_tmp0, v_tmp1, v_tmp2;
+ vl = __riscv_vsetvl_e8m2(w);
+ v_y = __riscv_vle8_v_u8m2(src_y, vl);
+ v_y_16 = __riscv_vwaddu_vx_u16m4(v_y, 0, vl);
+ v_tmp0 = __riscv_vmul_vx_u16m4(v_y_16, 0x0101, vl); // 257 * v_y
+ v_tmp1 = __riscv_vmulhu_vv_u16m4(v_tmp0, v_yg, vl);
+ if (is_yb_positive) {
+ v_tmp2 = __riscv_vsaddu_vv_u16m4(v_tmp1, v_yb, vl);
+ } else {
+ v_tmp2 = __riscv_vssubu_vv_u16m4(v_tmp1, v_yb, vl);
+ }
+ v_out = __riscv_vnclipu_wx_u8m2(v_tmp2, 6, vl);
+ __riscv_vsseg4e8_v_u8m2(dst_argb, v_out, v_out, v_out, v_a, vl);
+ w -= vl;
+ src_y += vl;
+ dst_argb += vl * 4;
+ } while (w > 0);
+}
+#endif
+
+#ifdef HAS_J400TOARGBROW_RVV
+void J400ToARGBRow_RVV(const uint8_t* src_y, uint8_t* dst_argb, int width) {
+ size_t w = (size_t)width;
+ size_t vl = __riscv_vsetvl_e8m2(w);
+ vuint8m2_t v_a = __riscv_vmv_v_x_u8m2(255u, vl);
+ do {
+ vuint8m2_t v_y;
+ v_y = __riscv_vle8_v_u8m2(src_y, vl);
+ __riscv_vsseg4e8_v_u8m2(dst_argb, v_y, v_y, v_y, v_a, vl);
+ w -= vl;
+ src_y += vl;
+ dst_argb += vl * 4;
+ vl = __riscv_vsetvl_e8m2(w);
+ } while (w > 0);
+}
+#endif
+
+#ifdef HAS_COPYROW_RVV
+void CopyRow_RVV(const uint8_t* src, uint8_t* dst, int width) {
+ size_t w = (size_t)width;
+ do {
+ size_t vl = __riscv_vsetvl_e8m8(w);
+ vuint8m8_t v_data = __riscv_vle8_v_u8m8(src, vl);
+ __riscv_vse8_v_u8m8(dst, v_data, vl);
+ w -= vl;
+ src += vl;
+ dst += vl;
+ } while (w > 0);
+}
+#endif
+
+#ifdef HAS_NV12TOARGBROW_RVV
+void NV12ToARGBRow_RVV(const uint8_t* src_y,
+ const uint8_t* src_uv,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ size_t w = (size_t)width;
+ size_t vl = __riscv_vsetvl_e8m2(w);
+ uint8_t ub, vr, ug, vg;
+ int16_t yg, bb, bg, br;
+ vuint8m2_t v_u, v_v;
+ vuint8m2_t v_b, v_g, v_r, v_a;
+ vuint16m4_t v_y_16, v_g_16, v_b_16, v_r_16;
+ YUVTORGB_SETUP(yuvconstants, ub, vr, ug, vg, yg, bb, bg, br);
+ v_a = __riscv_vmv_v_x_u8m2(255u, vl);
+ do {
+ READNV12(vl, w, src_y, src_uv, v_u, v_v, v_y_16);
+ YUVTORGB(vl, v_u, v_v, ub, vr, ug, vg, yg, bb, bg, br, v_y_16, v_g_16,
+ v_b_16, v_r_16);
+ RGBTORGB8(vl, v_g_16, v_b_16, v_r_16, v_g, v_b, v_r);
+ __riscv_vsseg4e8_v_u8m2(dst_argb, v_b, v_g, v_r, v_a, vl);
+ w -= vl;
+ src_y += vl;
+ src_uv += vl;
+ dst_argb += vl * 4;
+ } while (w > 0);
+}
+#endif
+
+#ifdef HAS_NV12TORGB24ROW_RVV
+void NV12ToRGB24Row_RVV(const uint8_t* src_y,
+ const uint8_t* src_uv,
+ uint8_t* dst_rgb24,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ size_t w = (size_t)width;
+ size_t vl = __riscv_vsetvl_e8m2(w);
+ uint8_t ub, vr, ug, vg;
+ int16_t yg, bb, bg, br;
+ vuint8m2_t v_u, v_v;
+ vuint8m2_t v_b, v_g, v_r;
+ vuint16m4_t v_y_16, v_g_16, v_b_16, v_r_16;
+ YUVTORGB_SETUP(yuvconstants, ub, vr, ug, vg, yg, bb, bg, br);
+ do {
+ READNV12(vl, w, src_y, src_uv, v_u, v_v, v_y_16);
+ YUVTORGB(vl, v_u, v_v, ub, vr, ug, vg, yg, bb, bg, br, v_y_16, v_g_16,
+ v_b_16, v_r_16);
+ RGBTORGB8(vl, v_g_16, v_b_16, v_r_16, v_g, v_b, v_r);
+ __riscv_vsseg3e8_v_u8m2(dst_rgb24, v_b, v_g, v_r, vl);
+ w -= vl;
+ src_y += vl;
+ src_uv += vl;
+ dst_rgb24 += vl * 3;
+ } while (w > 0);
+}
+#endif
+
+#ifdef HAS_NV21TOARGBROW_RVV
+void NV21ToARGBRow_RVV(const uint8_t* src_y,
+ const uint8_t* src_vu,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ size_t w = (size_t)width;
+ size_t vl = __riscv_vsetvl_e8m2(w);
+ uint8_t ub, vr, ug, vg;
+ int16_t yg, bb, bg, br;
+ vuint8m2_t v_u, v_v;
+ vuint8m2_t v_b, v_g, v_r, v_a;
+ vuint16m4_t v_y_16, v_g_16, v_b_16, v_r_16;
+ YUVTORGB_SETUP(yuvconstants, ub, vr, ug, vg, yg, bb, bg, br);
+ v_a = __riscv_vmv_v_x_u8m2(255u, vl);
+ do {
+ READNV21(vl, w, src_y, src_vu, v_u, v_v, v_y_16);
+ YUVTORGB(vl, v_u, v_v, ub, vr, ug, vg, yg, bb, bg, br, v_y_16, v_g_16,
+ v_b_16, v_r_16);
+ RGBTORGB8(vl, v_g_16, v_b_16, v_r_16, v_g, v_b, v_r);
+ __riscv_vsseg4e8_v_u8m2(dst_argb, v_b, v_g, v_r, v_a, vl);
+ w -= vl;
+ src_y += vl;
+ src_vu += vl;
+ dst_argb += vl * 4;
+ } while (w > 0);
+}
+#endif
+
+#ifdef HAS_NV21TORGB24ROW_RVV
+void NV21ToRGB24Row_RVV(const uint8_t* src_y,
+ const uint8_t* src_vu,
+ uint8_t* dst_rgb24,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ size_t w = (size_t)width;
+ size_t vl = __riscv_vsetvl_e8m2(w);
+ uint8_t ub, vr, ug, vg;
+ int16_t yg, bb, bg, br;
+ vuint8m2_t v_u, v_v;
+ vuint8m2_t v_b, v_g, v_r;
+ vuint16m4_t v_y_16, v_g_16, v_b_16, v_r_16;
+ YUVTORGB_SETUP(yuvconstants, ub, vr, ug, vg, yg, bb, bg, br);
+ do {
+ READNV21(vl, w, src_y, src_vu, v_u, v_v, v_y_16);
+ YUVTORGB(vl, v_u, v_v, ub, vr, ug, vg, yg, bb, bg, br, v_y_16, v_g_16,
+ v_b_16, v_r_16);
+ RGBTORGB8(vl, v_g_16, v_b_16, v_r_16, v_g, v_b, v_r);
+ __riscv_vsseg3e8_v_u8m2(dst_rgb24, v_b, v_g, v_r, vl);
+ w -= vl;
+ src_y += vl;
+ src_vu += vl;
+ dst_rgb24 += vl * 3;
+ } while (w > 0);
+}
+#endif
+
+// Bilinear filter [VLEN/8]x2 -> [VLEN/8]x1
+
+#ifdef HAS_INTERPOLATEROW_RVV
+void InterpolateRow_RVV(uint8_t* dst_ptr,
+ const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ int dst_width,
+ int source_y_fraction) {
+ int y1_fraction = source_y_fraction;
+ int y0_fraction = 256 - y1_fraction;
+ const uint8_t* src_ptr1 = src_ptr + src_stride;
+ size_t dst_w = (size_t)dst_width;
+ assert(source_y_fraction >= 0);
+ assert(source_y_fraction < 256);
+ // Blend 100 / 0 - Copy row unchanged.
+ if (y1_fraction == 0) {
+ do {
+ size_t vl = __riscv_vsetvl_e8m8(dst_w);
+ __riscv_vse8_v_u8m8(dst_ptr, __riscv_vle8_v_u8m8(src_ptr, vl), vl);
+ dst_w -= vl;
+ src_ptr += vl;
+ dst_ptr += vl;
+ } while (dst_w > 0);
+ return;
+ }
+ // To match behavior on other platforms, vxrm (fixed-point rounding mode
+ // register) is set to round-to-nearest-up(0).
+ asm volatile("csrwi vxrm, 0");
+ // Blend 50 / 50.
+ if (y1_fraction == 128) {
+ do {
+ size_t vl = __riscv_vsetvl_e8m8(dst_w);
+ vuint8m8_t row0 = __riscv_vle8_v_u8m8(src_ptr, vl);
+ vuint8m8_t row1 = __riscv_vle8_v_u8m8(src_ptr1, vl);
+ // Use round-to-nearest-up mode for averaging add
+ vuint8m8_t row_out = __riscv_vaaddu_vv_u8m8(row0, row1, vl);
+ __riscv_vse8_v_u8m8(dst_ptr, row_out, vl);
+ dst_w -= vl;
+ src_ptr += vl;
+ src_ptr1 += vl;
+ dst_ptr += vl;
+ } while (dst_w > 0);
+ return;
+ }
+ // General purpose row blend.
+ do {
+ size_t vl = __riscv_vsetvl_e8m4(dst_w);
+ vuint8m4_t row0 = __riscv_vle8_v_u8m4(src_ptr, vl);
+ vuint16m8_t acc = __riscv_vwmulu_vx_u16m8(row0, y0_fraction, vl);
+ vuint8m4_t row1 = __riscv_vle8_v_u8m4(src_ptr1, vl);
+ acc = __riscv_vwmaccu_vx_u16m8(acc, y1_fraction, row1, vl);
+ // Use round-to-nearest-up mode for vnclip
+ __riscv_vse8_v_u8m4(dst_ptr, __riscv_vnclipu_wx_u8m4(acc, 8, vl), vl);
+ dst_w -= vl;
+ src_ptr += vl;
+ src_ptr1 += vl;
+ dst_ptr += vl;
+ } while (dst_w > 0);
+}
+#endif
+
+#ifdef HAS_SPLITRGBROW_RVV
+void SplitRGBRow_RVV(const uint8_t* src_rgb,
+ uint8_t* dst_r,
+ uint8_t* dst_g,
+ uint8_t* dst_b,
+ int width) {
+ size_t w = (size_t)width;
+ do {
+ vuint8m2_t v_b, v_g, v_r;
+ size_t vl = __riscv_vsetvl_e8m2(w);
+ __riscv_vlseg3e8_v_u8m2(&v_r, &v_g, &v_b, src_rgb, vl);
+ __riscv_vse8_v_u8m2(dst_r, v_r, vl);
+ __riscv_vse8_v_u8m2(dst_g, v_g, vl);
+ __riscv_vse8_v_u8m2(dst_b, v_b, vl);
+ w -= vl;
+ dst_r += vl;
+ dst_g += vl;
+ dst_b += vl;
+ src_rgb += vl * 3;
+ } while (w > 0);
+}
+#endif
+
+#ifdef HAS_MERGERGBROW_RVV
+void MergeRGBRow_RVV(const uint8_t* src_r,
+ const uint8_t* src_g,
+ const uint8_t* src_b,
+ uint8_t* dst_rgb,
+ int width) {
+ size_t w = (size_t)width;
+ do {
+ size_t vl = __riscv_vsetvl_e8m2(w);
+ vuint8m2_t v_r = __riscv_vle8_v_u8m2(src_r, vl);
+ vuint8m2_t v_g = __riscv_vle8_v_u8m2(src_g, vl);
+ vuint8m2_t v_b = __riscv_vle8_v_u8m2(src_b, vl);
+ __riscv_vsseg3e8_v_u8m2(dst_rgb, v_r, v_g, v_b, vl);
+ w -= vl;
+ src_r += vl;
+ src_g += vl;
+ src_b += vl;
+ dst_rgb += vl * 3;
+ } while (w > 0);
+}
+#endif
+
+#ifdef HAS_SPLITARGBROW_RVV
+void SplitARGBRow_RVV(const uint8_t* src_argb,
+ uint8_t* dst_r,
+ uint8_t* dst_g,
+ uint8_t* dst_b,
+ uint8_t* dst_a,
+ int width) {
+ size_t w = (size_t)width;
+ do {
+ vuint8m2_t v_b, v_g, v_r, v_a;
+ size_t vl = __riscv_vsetvl_e8m2(w);
+ __riscv_vlseg4e8_v_u8m2(&v_b, &v_g, &v_r, &v_a, src_argb, vl);
+ __riscv_vse8_v_u8m2(dst_a, v_a, vl);
+ __riscv_vse8_v_u8m2(dst_r, v_r, vl);
+ __riscv_vse8_v_u8m2(dst_g, v_g, vl);
+ __riscv_vse8_v_u8m2(dst_b, v_b, vl);
+ w -= vl;
+ dst_a += vl;
+ dst_r += vl;
+ dst_g += vl;
+ dst_b += vl;
+ src_argb += vl * 4;
+ } while (w > 0);
+}
+#endif
+
+#ifdef HAS_MERGEARGBROW_RVV
+void MergeARGBRow_RVV(const uint8_t* src_r,
+ const uint8_t* src_g,
+ const uint8_t* src_b,
+ const uint8_t* src_a,
+ uint8_t* dst_argb,
+ int width) {
+ size_t w = (size_t)width;
+ do {
+ size_t vl = __riscv_vsetvl_e8m2(w);
+ vuint8m2_t v_r = __riscv_vle8_v_u8m2(src_r, vl);
+ vuint8m2_t v_g = __riscv_vle8_v_u8m2(src_g, vl);
+ vuint8m2_t v_b = __riscv_vle8_v_u8m2(src_b, vl);
+ vuint8m2_t v_a = __riscv_vle8_v_u8m2(src_a, vl);
+ __riscv_vsseg4e8_v_u8m2(dst_argb, v_b, v_g, v_r, v_a, vl);
+ w -= vl;
+ src_r += vl;
+ src_g += vl;
+ src_b += vl;
+ src_a += vl;
+ dst_argb += vl * 4;
+ } while (w > 0);
+}
+#endif
+
+#ifdef HAS_SPLITXRGBROW_RVV
+void SplitXRGBRow_RVV(const uint8_t* src_argb,
+ uint8_t* dst_r,
+ uint8_t* dst_g,
+ uint8_t* dst_b,
+ int width) {
+ size_t w = (size_t)width;
+ do {
+ vuint8m2_t v_b, v_g, v_r, v_a;
+ size_t vl = __riscv_vsetvl_e8m2(w);
+ __riscv_vlseg4e8_v_u8m2(&v_b, &v_g, &v_r, &v_a, src_argb, vl);
+ __riscv_vse8_v_u8m2(dst_r, v_r, vl);
+ __riscv_vse8_v_u8m2(dst_g, v_g, vl);
+ __riscv_vse8_v_u8m2(dst_b, v_b, vl);
+ w -= vl;
+ dst_r += vl;
+ dst_g += vl;
+ dst_b += vl;
+ src_argb += vl * 4;
+ } while (w > 0);
+}
+#endif
+
+#ifdef HAS_MERGEXRGBROW_RVV
+void MergeXRGBRow_RVV(const uint8_t* src_r,
+ const uint8_t* src_g,
+ const uint8_t* src_b,
+ uint8_t* dst_argb,
+ int width) {
+ size_t w = (size_t)width;
+ size_t vl = __riscv_vsetvl_e8m2(w);
+ vuint8m2_t v_a = __riscv_vmv_v_x_u8m2(255u, vl);
+ do {
+ vuint8m2_t v_r, v_g, v_b;
+ v_r = __riscv_vle8_v_u8m2(src_r, vl);
+ v_g = __riscv_vle8_v_u8m2(src_g, vl);
+ v_b = __riscv_vle8_v_u8m2(src_b, vl);
+ __riscv_vsseg4e8_v_u8m2(dst_argb, v_b, v_g, v_r, v_a, vl);
+ w -= vl;
+ src_r += vl;
+ src_g += vl;
+ src_b += vl;
+ dst_argb += vl * 4;
+ vl = __riscv_vsetvl_e8m2(w);
+ } while (w > 0);
+}
+#endif
+
+#ifdef HAS_SPLITUVROW_RVV
+void SplitUVRow_RVV(const uint8_t* src_uv,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width) {
+ size_t w = (size_t)width;
+ do {
+ size_t vl = __riscv_vsetvl_e8m4(w);
+ vuint8m4_t v_u, v_v;
+ __riscv_vlseg2e8_v_u8m4(&v_u, &v_v, src_uv, vl);
+ __riscv_vse8_v_u8m4(dst_u, v_u, vl);
+ __riscv_vse8_v_u8m4(dst_v, v_v, vl);
+ w -= vl;
+ dst_u += vl;
+ dst_v += vl;
+ src_uv += 2 * vl;
+ } while (w > 0);
+}
+#endif
+
+#ifdef HAS_MERGEUVROW_RVV
+void MergeUVRow_RVV(const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_uv,
+ int width) {
+ size_t w = (size_t)width;
+ do {
+ vuint8m4_t v_u, v_v;
+ size_t vl = __riscv_vsetvl_e8m4(w);
+ v_u = __riscv_vle8_v_u8m4(src_u, vl);
+ v_v = __riscv_vle8_v_u8m4(src_v, vl);
+ __riscv_vsseg2e8_v_u8m4(dst_uv, v_u, v_v, vl);
+ w -= vl;
+ src_u += vl;
+ src_v += vl;
+ dst_uv += 2 * vl;
+ } while (w > 0);
+}
+#endif
+
+struct RgbConstants {
+ uint8_t kRGBToY[4];
+ uint16_t kAddY;
+ uint16_t pad;
+};
+
+// RGB to JPeg coefficients
+// B * 0.1140 coefficient = 29
+// G * 0.5870 coefficient = 150
+// R * 0.2990 coefficient = 77
+// Add 0.5 = 0x80
+static const struct RgbConstants kRgb24JPEGConstants = {{29, 150, 77, 0},
+ 128,
+ 0};
+
+static const struct RgbConstants kRawJPEGConstants = {{77, 150, 29, 0}, 128, 0};
+
+// RGB to BT.601 coefficients
+// B * 0.1016 coefficient = 25
+// G * 0.5078 coefficient = 129
+// R * 0.2578 coefficient = 66
+// Add 16.5 = 0x1080
+
+static const struct RgbConstants kRgb24I601Constants = {{25, 129, 66, 0},
+ 0x1080,
+ 0};
+
+static const struct RgbConstants kRawI601Constants = {{66, 129, 25, 0},
+ 0x1080,
+ 0};
+
+// ARGB expects first 3 values to contain RGB and 4th value is ignored
+#ifdef HAS_ARGBTOYMATRIXROW_RVV
+void ARGBToYMatrixRow_RVV(const uint8_t* src_argb,
+ uint8_t* dst_y,
+ int width,
+ const struct RgbConstants* rgbconstants) {
+ assert(width != 0);
+ size_t w = (size_t)width;
+ vuint8m2_t v_by, v_gy, v_ry; // vectors are to store RGBToY constant
+ vuint16m4_t v_addy; // vector is to store kAddY
+ size_t vl = __riscv_vsetvl_e8m2(w);
+ v_by = __riscv_vmv_v_x_u8m2(rgbconstants->kRGBToY[0], vl);
+ v_gy = __riscv_vmv_v_x_u8m2(rgbconstants->kRGBToY[1], vl);
+ v_ry = __riscv_vmv_v_x_u8m2(rgbconstants->kRGBToY[2], vl);
+ v_addy = __riscv_vmv_v_x_u16m4(rgbconstants->kAddY, vl);
+ do {
+ vuint8m2_t v_b, v_g, v_r, v_a, v_y;
+ vuint16m4_t v_y_u16;
+ size_t vl = __riscv_vsetvl_e8m2(w);
+ __riscv_vlseg4e8_v_u8m2(&v_b, &v_g, &v_r, &v_a, src_argb, vl);
+ v_y_u16 = __riscv_vwmulu_vv_u16m4(v_r, v_ry, vl);
+ v_y_u16 = __riscv_vwmaccu_vv_u16m4(v_y_u16, v_gy, v_g, vl);
+ v_y_u16 = __riscv_vwmaccu_vv_u16m4(v_y_u16, v_by, v_b, vl);
+ v_y_u16 = __riscv_vadd_vv_u16m4(v_y_u16, v_addy, vl);
+ v_y = __riscv_vnsrl_wx_u8m2(v_y_u16, 8, vl);
+ __riscv_vse8_v_u8m2(dst_y, v_y, vl);
+ w -= vl;
+ src_argb += 4 * vl;
+ dst_y += vl;
+ } while (w > 0);
+}
+#endif
+
+#ifdef HAS_ARGBTOYROW_RVV
+void ARGBToYRow_RVV(const uint8_t* src_argb, uint8_t* dst_y, int width) {
+ ARGBToYMatrixRow_RVV(src_argb, dst_y, width, &kRgb24I601Constants);
+}
+#endif
+
+#ifdef HAS_ARGBTOYJROW_RVV
+void ARGBToYJRow_RVV(const uint8_t* src_argb, uint8_t* dst_yj, int width) {
+ ARGBToYMatrixRow_RVV(src_argb, dst_yj, width, &kRgb24JPEGConstants);
+}
+#endif
+
+#ifdef HAS_ABGRTOYROW_RVV
+void ABGRToYRow_RVV(const uint8_t* src_abgr, uint8_t* dst_y, int width) {
+ ARGBToYMatrixRow_RVV(src_abgr, dst_y, width, &kRawI601Constants);
+}
+#endif
+
+#ifdef HAS_ABGRTOYJROW_RVV
+void ABGRToYJRow_RVV(const uint8_t* src_abgr, uint8_t* dst_yj, int width) {
+ ARGBToYMatrixRow_RVV(src_abgr, dst_yj, width, &kRawJPEGConstants);
+}
+#endif
+
+// RGBA expects first value to be A and ignored, then 3 values to contain RGB.
+#ifdef HAS_RGBATOYMATRIXROW_RVV
+void RGBAToYMatrixRow_RVV(const uint8_t* src_rgba,
+ uint8_t* dst_y,
+ int width,
+ const struct RgbConstants* rgbconstants) {
+ assert(width != 0);
+ size_t w = (size_t)width;
+ vuint8m2_t v_by, v_gy, v_ry; // vectors are to store RGBToY constant
+ vuint16m4_t v_addy; // vector is to store kAddY
+ size_t vl = __riscv_vsetvl_e8m2(w);
+ v_by = __riscv_vmv_v_x_u8m2(rgbconstants->kRGBToY[0], vl);
+ v_gy = __riscv_vmv_v_x_u8m2(rgbconstants->kRGBToY[1], vl);
+ v_ry = __riscv_vmv_v_x_u8m2(rgbconstants->kRGBToY[2], vl);
+ v_addy = __riscv_vmv_v_x_u16m4(rgbconstants->kAddY, vl);
+ do {
+ vuint8m2_t v_b, v_g, v_r, v_a, v_y;
+ vuint16m4_t v_y_u16;
+ size_t vl = __riscv_vsetvl_e8m2(w);
+ __riscv_vlseg4e8_v_u8m2(&v_a, &v_b, &v_g, &v_r, src_rgba, vl);
+ v_y_u16 = __riscv_vwmulu_vv_u16m4(v_r, v_ry, vl);
+ v_y_u16 = __riscv_vwmaccu_vv_u16m4(v_y_u16, v_gy, v_g, vl);
+ v_y_u16 = __riscv_vwmaccu_vv_u16m4(v_y_u16, v_by, v_b, vl);
+ v_y_u16 = __riscv_vadd_vv_u16m4(v_y_u16, v_addy, vl);
+ v_y = __riscv_vnsrl_wx_u8m2(v_y_u16, 8, vl);
+ __riscv_vse8_v_u8m2(dst_y, v_y, vl);
+ w -= vl;
+ src_rgba += 4 * vl;
+ dst_y += vl;
+ } while (w > 0);
+}
+#endif
+
+#ifdef HAS_RGBATOYROW_RVV
+void RGBAToYRow_RVV(const uint8_t* src_rgba, uint8_t* dst_y, int width) {
+ RGBAToYMatrixRow_RVV(src_rgba, dst_y, width, &kRgb24I601Constants);
+}
+#endif
+
+#ifdef HAS_RGBATOYJROW_RVV
+void RGBAToYJRow_RVV(const uint8_t* src_rgba, uint8_t* dst_yj, int width) {
+ RGBAToYMatrixRow_RVV(src_rgba, dst_yj, width, &kRgb24JPEGConstants);
+}
+#endif
+
+#ifdef HAS_BGRATOYROW_RVV
+void BGRAToYRow_RVV(const uint8_t* src_bgra, uint8_t* dst_y, int width) {
+ RGBAToYMatrixRow_RVV(src_bgra, dst_y, width, &kRawI601Constants);
+}
+#endif
+
+#ifdef HAS_RGBTOYMATRIXROW_RVV
+void RGBToYMatrixRow_RVV(const uint8_t* src_rgb,
+ uint8_t* dst_y,
+ int width,
+ const struct RgbConstants* rgbconstants) {
+ assert(width != 0);
+ size_t w = (size_t)width;
+ vuint8m2_t v_by, v_gy, v_ry; // vectors are to store RGBToY constant
+ vuint16m4_t v_addy; // vector is to store kAddY
+ size_t vl = __riscv_vsetvl_e8m2(w);
+ v_by = __riscv_vmv_v_x_u8m2(rgbconstants->kRGBToY[0], vl);
+ v_gy = __riscv_vmv_v_x_u8m2(rgbconstants->kRGBToY[1], vl);
+ v_ry = __riscv_vmv_v_x_u8m2(rgbconstants->kRGBToY[2], vl);
+ v_addy = __riscv_vmv_v_x_u16m4(rgbconstants->kAddY, vl);
+ do {
+ vuint8m2_t v_b, v_g, v_r, v_y;
+ vuint16m4_t v_y_u16;
+ size_t vl = __riscv_vsetvl_e8m2(w);
+ __riscv_vlseg3e8_v_u8m2(&v_b, &v_g, &v_r, src_rgb, vl);
+ v_y_u16 = __riscv_vwmulu_vv_u16m4(v_r, v_ry, vl);
+ v_y_u16 = __riscv_vwmaccu_vv_u16m4(v_y_u16, v_gy, v_g, vl);
+ v_y_u16 = __riscv_vwmaccu_vv_u16m4(v_y_u16, v_by, v_b, vl);
+ v_y_u16 = __riscv_vadd_vv_u16m4(v_y_u16, v_addy, vl);
+ v_y = __riscv_vnsrl_wx_u8m2(v_y_u16, 8, vl);
+ __riscv_vse8_v_u8m2(dst_y, v_y, vl);
+ w -= vl;
+ src_rgb += 3 * vl;
+ dst_y += vl;
+ } while (w > 0);
+}
+#endif
+
+#ifdef HAS_RGB24TOYJROW_RVV
+void RGB24ToYJRow_RVV(const uint8_t* src_rgb24, uint8_t* dst_yj, int width) {
+ RGBToYMatrixRow_RVV(src_rgb24, dst_yj, width, &kRgb24JPEGConstants);
+}
+#endif
+
+#ifdef HAS_RAWTOYJROW_RVV
+void RAWToYJRow_RVV(const uint8_t* src_raw, uint8_t* dst_yj, int width) {
+ RGBToYMatrixRow_RVV(src_raw, dst_yj, width, &kRawJPEGConstants);
+}
+#endif
+
+#ifdef HAS_RGB24TOYROW_RVV
+void RGB24ToYRow_RVV(const uint8_t* src_rgb24, uint8_t* dst_y, int width) {
+ RGBToYMatrixRow_RVV(src_rgb24, dst_y, width, &kRgb24I601Constants);
+}
+#endif
+
+#ifdef HAS_RAWTOYROW_RVV
+void RAWToYRow_RVV(const uint8_t* src_raw, uint8_t* dst_y, int width) {
+ RGBToYMatrixRow_RVV(src_raw, dst_y, width, &kRawI601Constants);
+}
+#endif
+
+// Blend src_argb over src_argb1 and store to dst_argb.
+// dst_argb may be src_argb or src_argb1.
+// src_argb: RGB values have already been pre-multiplied by the a.
+#ifdef HAS_ARGBBLENDROW_RVV
+void ARGBBlendRow_RVV(const uint8_t* src_argb,
+ const uint8_t* src_argb1,
+ uint8_t* dst_argb,
+ int width) {
+ size_t w = (size_t)width;
+ size_t vl = __riscv_vsetvlmax_e8m2();
+ // clamp255((((256 - a) * b) >> 8) + f)
+ // = b * (256 - a) / 256 + f
+ // = b - (b * a / 256) + f
+ vuint8m2_t v_255 = __riscv_vmv_v_x_u8m2(255, vl);
+ do {
+ vuint8m2_t v_src0_b, v_src0_g, v_src0_r, v_src0_a;
+ vuint8m2_t v_src1_b, v_src1_g, v_src1_r, v_src1_a;
+ vuint8m2_t v_tmp_b, v_tmp_g, v_tmp_r;
+ vuint8m2_t v_dst_b, v_dst_g, v_dst_r;
+ vl = __riscv_vsetvl_e8m2(w);
+ __riscv_vlseg4e8_v_u8m2(&v_src0_b, &v_src0_g, &v_src0_r, &v_src0_a,
+ src_argb, vl);
+ __riscv_vlseg4e8_v_u8m2(&v_src1_b, &v_src1_g, &v_src1_r, &v_src1_a,
+ src_argb1, vl);
+
+ v_tmp_b = __riscv_vmulhu_vv_u8m2(v_src1_b, v_src0_a, vl);
+ v_tmp_g = __riscv_vmulhu_vv_u8m2(v_src1_g, v_src0_a, vl);
+ v_tmp_r = __riscv_vmulhu_vv_u8m2(v_src1_r, v_src0_a, vl);
+
+ v_dst_b = __riscv_vsub_vv_u8m2(v_src1_b, v_tmp_b, vl);
+ v_dst_g = __riscv_vsub_vv_u8m2(v_src1_g, v_tmp_g, vl);
+ v_dst_r = __riscv_vsub_vv_u8m2(v_src1_r, v_tmp_r, vl);
+
+ v_dst_b = __riscv_vsaddu_vv_u8m2(v_dst_b, v_src0_b, vl);
+ v_dst_g = __riscv_vsaddu_vv_u8m2(v_dst_g, v_src0_g, vl);
+ v_dst_r = __riscv_vsaddu_vv_u8m2(v_dst_r, v_src0_r, vl);
+ __riscv_vsseg4e8_v_u8m2(dst_argb, v_dst_b, v_dst_g, v_dst_r, v_255, vl);
+
+ w -= vl;
+ src_argb += 4 * vl;
+ src_argb1 += 4 * vl;
+ dst_argb += 4 * vl;
+ } while (w > 0);
+}
+#endif
+
+#ifdef HAS_BLENDPLANEROW_RVV
+void BlendPlaneRow_RVV(const uint8_t* src0,
+ const uint8_t* src1,
+ const uint8_t* alpha,
+ uint8_t* dst,
+ int width) {
+ size_t w = (size_t)width;
+ do {
+ vuint16m8_t v_dst_u16;
+ vuint8m4_t v_dst;
+ size_t vl = __riscv_vsetvl_e8m4(w);
+ vuint8m4_t v_src0 = __riscv_vle8_v_u8m4(src0, vl);
+ vuint8m4_t v_src1 = __riscv_vle8_v_u8m4(src1, vl);
+ vuint8m4_t v_alpha = __riscv_vle8_v_u8m4(alpha, vl);
+ vuint8m4_t v_255_minus_alpha = __riscv_vrsub_vx_u8m4(v_alpha, 255u, vl);
+
+ // (a * foreground) + (1-a) * background
+ v_dst_u16 = __riscv_vwmulu_vv_u16m8(v_alpha, v_src0, vl);
+ v_dst_u16 =
+ __riscv_vwmaccu_vv_u16m8(v_dst_u16, v_255_minus_alpha, v_src1, vl);
+ v_dst_u16 = __riscv_vadd_vx_u16m8(v_dst_u16, 255u, vl);
+ v_dst = __riscv_vnsrl_wx_u8m4(v_dst_u16, 8, vl);
+
+ __riscv_vse8_v_u8m4(dst, v_dst, vl);
+ w -= vl;
+ src0 += vl;
+ src1 += vl;
+ alpha += vl;
+ dst += vl;
+ } while (w > 0);
+}
+#endif
+
+// Attenuate: (f * a + 255) >> 8
+#ifdef HAS_ARGBATTENUATEROW_RVV
+void ARGBAttenuateRow_RVV(const uint8_t* src_argb,
+ uint8_t* dst_argb,
+ int width) {
+ size_t w = (size_t)width;
+ do {
+ vuint8m2_t v_b, v_g, v_r, v_a;
+ vuint16m4_t v_ba_16, v_ga_16, v_ra_16;
+ size_t vl = __riscv_vsetvl_e8m2(w);
+ __riscv_vlseg4e8_v_u8m2(&v_b, &v_g, &v_r, &v_a, src_argb, vl);
+ // f * a
+ v_ba_16 = __riscv_vwmulu_vv_u16m4(v_b, v_a, vl);
+ v_ga_16 = __riscv_vwmulu_vv_u16m4(v_g, v_a, vl);
+ v_ra_16 = __riscv_vwmulu_vv_u16m4(v_r, v_a, vl);
+ // f * a + 255
+ v_ba_16 = __riscv_vadd_vx_u16m4(v_ba_16, 255u, vl);
+ v_ga_16 = __riscv_vadd_vx_u16m4(v_ga_16, 255u, vl);
+ v_ra_16 = __riscv_vadd_vx_u16m4(v_ra_16, 255u, vl);
+ // (f * a + 255) >> 8
+ v_b = __riscv_vnsrl_wx_u8m2(v_ba_16, 8, vl);
+ v_g = __riscv_vnsrl_wx_u8m2(v_ga_16, 8, vl);
+ v_r = __riscv_vnsrl_wx_u8m2(v_ra_16, 8, vl);
+ __riscv_vsseg4e8_v_u8m2(dst_argb, v_b, v_g, v_r, v_a, vl);
+ w -= vl;
+ src_argb += vl * 4;
+ dst_argb += vl * 4;
+ } while (w > 0);
+}
+#endif
+
+#ifdef HAS_ARGBEXTRACTALPHAROW_RVV
+void ARGBExtractAlphaRow_RVV(const uint8_t* src_argb,
+ uint8_t* dst_a,
+ int width) {
+ size_t w = (size_t)width;
+ do {
+ size_t vl = __riscv_vsetvl_e8m2(w);
+ vuint8m2_t v_b, v_g, v_r, v_a;
+ __riscv_vlseg4e8_v_u8m2(&v_b, &v_g, &v_r, &v_a, src_argb, vl);
+ __riscv_vse8_v_u8m2(dst_a, v_a, vl);
+ w -= vl;
+ src_argb += vl * 4;
+ dst_a += vl;
+ } while (w > 0);
+}
+#endif
+
+#ifdef HAS_ARGBCOPYYTOALPHAROW_RVV
+void ARGBCopyYToAlphaRow_RVV(const uint8_t* src, uint8_t* dst, int width) {
+ size_t w = (size_t)width;
+ const ptrdiff_t dst_stride = 4;
+ dst += 3;
+ do {
+ size_t vl = __riscv_vsetvl_e8m8(w);
+ vuint8m8_t v_a = __riscv_vle8_v_u8m8(src, vl);
+ __riscv_vsse8_v_u8m8(dst, dst_stride, v_a, vl);
+ w -= vl;
+ src += vl;
+ dst += vl * dst_stride;
+ } while (w > 0);
+}
+#endif
+
+#ifdef __cplusplus
+} // extern "C"
+} // namespace libyuv
+#endif
+
+#endif // !defined(LIBYUV_DISABLE_RVV) && defined(__riscv_vector) &&
+ // defined(__clang__)
diff --git a/files/source/row_win.cc b/source/row_win.cc
index c7c1ff60..5fb28521 100644
--- a/files/source/row_win.cc
+++ b/source/row_win.cc
@@ -14,7 +14,9 @@
#if !defined(LIBYUV_DISABLE_X86) && defined(_MSC_VER) && \
!defined(__clang__) && (defined(_M_IX86) || defined(_M_X64))
-#if defined(_M_X64)
+#if defined(_M_ARM64EC)
+#include <intrin.h>
+#elif defined(_M_X64)
#include <emmintrin.h>
#include <tmmintrin.h> // For _mm_maddubs_epi16
#endif
@@ -893,7 +895,7 @@ __declspec(naked) void ARGBToRGB565Row_SSE2(const uint8_t* src_argb,
__declspec(naked) void ARGBToRGB565DitherRow_SSE2(const uint8_t* src_argb,
uint8_t* dst_rgb,
- const uint32_t dither4,
+ uint32_t dither4,
int width) {
__asm {
@@ -940,7 +942,7 @@ __declspec(naked) void ARGBToRGB565DitherRow_SSE2(const uint8_t* src_argb,
#ifdef HAS_ARGBTORGB565DITHERROW_AVX2
__declspec(naked) void ARGBToRGB565DitherRow_AVX2(const uint8_t* src_argb,
uint8_t* dst_rgb,
- const uint32_t dither4,
+ uint32_t dither4,
int width) {
__asm {
mov eax, [esp + 4] // src_argb
@@ -2789,6 +2791,44 @@ __declspec(naked) void I422ToRGB24Row_SSSE3(
}
}
+// 8 pixels.
+// 8 UV values, mixed with 8 Y producing 8 RGB24 (24 bytes).
+__declspec(naked) void I444ToRGB24Row_SSSE3(
+ const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_rgb24,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ __asm {
+ push esi
+ push edi
+ push ebx
+ mov eax, [esp + 12 + 4] // Y
+ mov esi, [esp + 12 + 8] // U
+ mov edi, [esp + 12 + 12] // V
+ mov edx, [esp + 12 + 16] // argb
+ mov ebx, [esp + 12 + 20] // yuvconstants
+ mov ecx, [esp + 12 + 24] // width
+ sub edi, esi
+ movdqa xmm5, xmmword ptr kShuffleMaskARGBToRGB24_0
+ movdqa xmm6, xmmword ptr kShuffleMaskARGBToRGB24
+
+ convertloop:
+ READYUV444
+ YUVTORGB(ebx)
+ STORERGB24
+
+ sub ecx, 8
+ jg convertloop
+
+ pop ebx
+ pop edi
+ pop esi
+ ret
+ }
+}
+
// 8 pixels
// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 RGB565 (16 bytes).
__declspec(naked) void I422ToRGB565Row_SSSE3(
@@ -3423,17 +3463,14 @@ __declspec(naked) void MergeUVRow_AVX2(const uint8_t* src_u,
sub edx, eax
convertloop:
- vmovdqu ymm0, [eax] // read 32 U's
- vmovdqu ymm1, [eax + edx] // and 32 V's
- lea eax, [eax + 32]
- vpunpcklbw ymm2, ymm0, ymm1 // low 16 UV pairs. mutated qqword 0,2
- vpunpckhbw ymm0, ymm0, ymm1 // high 16 UV pairs. mutated qqword 1,3
- vextractf128 [edi], ymm2, 0 // bytes 0..15
- vextractf128 [edi + 16], ymm0, 0 // bytes 16..31
- vextractf128 [edi + 32], ymm2, 1 // bytes 32..47
- vextractf128 [edi + 48], ymm0, 1 // bytes 47..63
- lea edi, [edi + 64]
- sub ecx, 32
+ vpmovzxbw ymm0, [eax]
+ vpmovzxbw ymm1, [eax + edx]
+ lea eax, [eax + 16]
+ vpsllw ymm1, ymm1, 8
+ vpor ymm2, ymm1, ymm0
+ vmovdqu [edi], ymm2
+ lea edi, [edi + 32]
+ sub ecx, 16
jg convertloop
pop edi
diff --git a/files/source/scale.cc b/source/scale.cc
index e1335f1e..b7a602ba 100644
--- a/files/source/scale.cc
+++ b/source/scale.cc
@@ -135,6 +135,14 @@ static void ScalePlaneDown2(int src_width,
}
}
#endif
+#if defined(HAS_SCALEROWDOWN2_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ ScaleRowDown2 = filtering == kFilterNone
+ ? ScaleRowDown2_RVV
+ : (filtering == kFilterLinear ? ScaleRowDown2Linear_RVV
+ : ScaleRowDown2Box_RVV);
+ }
+#endif
if (filtering == kFilterLinear) {
src_stride = 0;
@@ -198,6 +206,51 @@ static void ScalePlaneDown2_16(int src_width,
}
}
+void ScalePlaneDown2_16To8(int src_width,
+ int src_height,
+ int dst_width,
+ int dst_height,
+ int src_stride,
+ int dst_stride,
+ const uint16_t* src_ptr,
+ uint8_t* dst_ptr,
+ int scale,
+ enum FilterMode filtering) {
+ int y;
+ void (*ScaleRowDown2)(const uint16_t* src_ptr, ptrdiff_t src_stride,
+ uint8_t* dst_ptr, int dst_width, int scale) =
+ (src_width & 1)
+ ? (filtering == kFilterNone
+ ? ScaleRowDown2_16To8_Odd_C
+ : (filtering == kFilterLinear ? ScaleRowDown2Linear_16To8_Odd_C
+ : ScaleRowDown2Box_16To8_Odd_C))
+ : (filtering == kFilterNone
+ ? ScaleRowDown2_16To8_C
+ : (filtering == kFilterLinear ? ScaleRowDown2Linear_16To8_C
+ : ScaleRowDown2Box_16To8_C));
+ int row_stride = src_stride * 2;
+ (void)dst_height;
+ if (!filtering) {
+ src_ptr += src_stride; // Point to odd rows.
+ src_stride = 0;
+ }
+
+ if (filtering == kFilterLinear) {
+ src_stride = 0;
+ }
+ for (y = 0; y < src_height / 2; ++y) {
+ ScaleRowDown2(src_ptr, src_stride, dst_ptr, dst_width, scale);
+ src_ptr += row_stride;
+ dst_ptr += dst_stride;
+ }
+ if (src_height & 1) {
+ if (!filtering) {
+ src_ptr -= src_stride; // Point to last row.
+ }
+ ScaleRowDown2(src_ptr, 0, dst_ptr, dst_width, scale);
+ }
+}
+
// Scale plane, 1/4
// This is an optimized version for scaling down a plane to 1/4 of
// its original size.
@@ -267,6 +320,11 @@ static void ScalePlaneDown4(int src_width,
}
}
#endif
+#if defined(HAS_SCALEROWDOWN4_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ ScaleRowDown4 = filtering ? ScaleRowDown4Box_RVV : ScaleRowDown4_RVV;
+ }
+#endif
if (filtering == kFilterLinear) {
src_stride = 0;
@@ -427,6 +485,17 @@ static void ScalePlaneDown34(int src_width,
}
}
#endif
+#if defined(HAS_SCALEROWDOWN34_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ if (!filtering) {
+ ScaleRowDown34_0 = ScaleRowDown34_RVV;
+ ScaleRowDown34_1 = ScaleRowDown34_RVV;
+ } else {
+ ScaleRowDown34_0 = ScaleRowDown34_0_Box_RVV;
+ ScaleRowDown34_1 = ScaleRowDown34_1_Box_RVV;
+ }
+ }
+#endif
for (y = 0; y < dst_height - 2; y += 3) {
ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width);
@@ -642,6 +711,17 @@ static void ScalePlaneDown38(int src_width,
}
}
#endif
+#if defined(HAS_SCALEROWDOWN38_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ if (!filtering) {
+ ScaleRowDown38_3 = ScaleRowDown38_RVV;
+ ScaleRowDown38_2 = ScaleRowDown38_RVV;
+ } else {
+ ScaleRowDown38_3 = ScaleRowDown38_3_Box_RVV;
+ ScaleRowDown38_2 = ScaleRowDown38_2_Box_RVV;
+ }
+ }
+#endif
for (y = 0; y < dst_height - 2; y += 3) {
ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
@@ -775,9 +855,11 @@ static void ScaleAddCols2_C(int dst_width,
int ix = x >> 16;
x += dx;
boxwidth = MIN1((x >> 16) - ix);
- *dst_ptr++ =
- SumPixels(boxwidth, src_ptr + ix) * scaletbl[boxwidth - minboxwidth] >>
- 16;
+ int scaletbl_index = boxwidth - minboxwidth;
+ assert((scaletbl_index == 0) || (scaletbl_index == 1));
+ *dst_ptr++ = (uint8_t)(SumPixels(boxwidth, src_ptr + ix) *
+ scaletbl[scaletbl_index] >>
+ 16);
}
}
@@ -797,9 +879,10 @@ static void ScaleAddCols2_16_C(int dst_width,
int ix = x >> 16;
x += dx;
boxwidth = MIN1((x >> 16) - ix);
- *dst_ptr++ = SumPixels_16(boxwidth, src_ptr + ix) *
- scaletbl[boxwidth - minboxwidth] >>
- 16;
+ int scaletbl_index = boxwidth - minboxwidth;
+ assert((scaletbl_index == 0) || (scaletbl_index == 1));
+ *dst_ptr++ =
+ SumPixels_16(boxwidth, src_ptr + ix) * scaletbl[scaletbl_index] >> 16;
}
}
@@ -814,7 +897,7 @@ static void ScaleAddCols0_C(int dst_width,
(void)dx;
src_ptr += (x >> 16);
for (i = 0; i < dst_width; ++i) {
- *dst_ptr++ = src_ptr[i] * scaleval >> 16;
+ *dst_ptr++ = (uint8_t)(src_ptr[i] * scaleval >> 16);
}
}
@@ -829,7 +912,7 @@ static void ScaleAddCols1_C(int dst_width,
int i;
x >>= 16;
for (i = 0; i < dst_width; ++i) {
- *dst_ptr++ = SumPixels(boxwidth, src_ptr + x) * scaleval >> 16;
+ *dst_ptr++ = (uint8_t)(SumPixels(boxwidth, src_ptr + x) * scaleval >> 16);
x += boxwidth;
}
}
@@ -856,14 +939,14 @@ static void ScaleAddCols1_16_C(int dst_width,
// one pixel of destination using fixed point (16.16) to step
// through source, sampling a box of pixel with simple
// averaging.
-static void ScalePlaneBox(int src_width,
- int src_height,
- int dst_width,
- int dst_height,
- int src_stride,
- int dst_stride,
- const uint8_t* src_ptr,
- uint8_t* dst_ptr) {
+static int ScalePlaneBox(int src_width,
+ int src_height,
+ int dst_width,
+ int dst_height,
+ int src_stride,
+ int dst_stride,
+ const uint8_t* src_ptr,
+ uint8_t* dst_ptr) {
int j, k;
// Initial source x/y coordinate and step values as 16.16 fixed point.
int x = 0;
@@ -877,6 +960,8 @@ static void ScalePlaneBox(int src_width,
{
// Allocate a row buffer of uint16_t.
align_buffer_64(row16, src_width * 2);
+ if (!row16)
+ return 1;
void (*ScaleAddCols)(int dst_width, int boxheight, int x, int dx,
const uint16_t* src_ptr, uint8_t* dst_ptr) =
(dx & 0xffff) ? ScaleAddCols2_C
@@ -923,6 +1008,11 @@ static void ScalePlaneBox(int src_width,
}
}
#endif
+#if defined(HAS_SCALEADDROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ ScaleAddRow = ScaleAddRow_RVV;
+ }
+#endif
for (j = 0; j < dst_height; ++j) {
int boxheight;
@@ -943,16 +1033,17 @@ static void ScalePlaneBox(int src_width,
}
free_aligned_buffer_64(row16);
}
+ return 0;
}
-static void ScalePlaneBox_16(int src_width,
- int src_height,
- int dst_width,
- int dst_height,
- int src_stride,
- int dst_stride,
- const uint16_t* src_ptr,
- uint16_t* dst_ptr) {
+static int ScalePlaneBox_16(int src_width,
+ int src_height,
+ int dst_width,
+ int dst_height,
+ int src_stride,
+ int dst_stride,
+ const uint16_t* src_ptr,
+ uint16_t* dst_ptr) {
int j, k;
// Initial source x/y coordinate and step values as 16.16 fixed point.
int x = 0;
@@ -966,6 +1057,8 @@ static void ScalePlaneBox_16(int src_width,
{
// Allocate a row buffer of uint32_t.
align_buffer_64(row32, src_width * 4);
+ if (!row32)
+ return 1;
void (*ScaleAddCols)(int dst_width, int boxheight, int x, int dx,
const uint32_t* src_ptr, uint16_t* dst_ptr) =
(dx & 0xffff) ? ScaleAddCols2_16_C : ScaleAddCols1_16_C;
@@ -997,18 +1090,19 @@ static void ScalePlaneBox_16(int src_width,
}
free_aligned_buffer_64(row32);
}
+ return 0;
}
// Scale plane down with bilinear interpolation.
-void ScalePlaneBilinearDown(int src_width,
- int src_height,
- int dst_width,
- int dst_height,
- int src_stride,
- int dst_stride,
- const uint8_t* src_ptr,
- uint8_t* dst_ptr,
- enum FilterMode filtering) {
+static int ScalePlaneBilinearDown(int src_width,
+ int src_height,
+ int dst_width,
+ int dst_height,
+ int src_stride,
+ int dst_stride,
+ const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ enum FilterMode filtering) {
// Initial source x/y coordinate and step values as 16.16 fixed point.
int x = 0;
int y = 0;
@@ -1017,13 +1111,15 @@ void ScalePlaneBilinearDown(int src_width,
// TODO(fbarchard): Consider not allocating row buffer for kFilterLinear.
// Allocate a row buffer.
align_buffer_64(row, src_width);
+ if (!row)
+ return 1;
const int max_y = (src_height - 1) << 16;
int j;
- void (*ScaleFilterCols)(uint8_t * dst_ptr, const uint8_t* src_ptr,
+ void (*ScaleFilterCols)(uint8_t* dst_ptr, const uint8_t* src_ptr,
int dst_width, int x, int dx) =
(src_width >= 32768) ? ScaleFilterCols64_C : ScaleFilterCols_C;
- void (*InterpolateRow)(uint8_t * dst_ptr, const uint8_t* src_ptr,
+ void (*InterpolateRow)(uint8_t* dst_ptr, const uint8_t* src_ptr,
ptrdiff_t src_stride, int dst_width,
int source_y_fraction) = InterpolateRow_C;
ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y,
@@ -1070,6 +1166,11 @@ void ScalePlaneBilinearDown(int src_width,
}
}
#endif
+#if defined(HAS_INTERPOLATEROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ InterpolateRow = InterpolateRow_RVV;
+ }
+#endif
#if defined(HAS_SCALEFILTERCOLS_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
@@ -1121,17 +1222,18 @@ void ScalePlaneBilinearDown(int src_width,
}
}
free_aligned_buffer_64(row);
+ return 0;
}
-void ScalePlaneBilinearDown_16(int src_width,
- int src_height,
- int dst_width,
- int dst_height,
- int src_stride,
- int dst_stride,
- const uint16_t* src_ptr,
- uint16_t* dst_ptr,
- enum FilterMode filtering) {
+static int ScalePlaneBilinearDown_16(int src_width,
+ int src_height,
+ int dst_width,
+ int dst_height,
+ int src_stride,
+ int dst_stride,
+ const uint16_t* src_ptr,
+ uint16_t* dst_ptr,
+ enum FilterMode filtering) {
// Initial source x/y coordinate and step values as 16.16 fixed point.
int x = 0;
int y = 0;
@@ -1140,13 +1242,15 @@ void ScalePlaneBilinearDown_16(int src_width,
// TODO(fbarchard): Consider not allocating row buffer for kFilterLinear.
// Allocate a row buffer.
align_buffer_64(row, src_width * 2);
+ if (!row)
+ return 1;
const int max_y = (src_height - 1) << 16;
int j;
- void (*ScaleFilterCols)(uint16_t * dst_ptr, const uint16_t* src_ptr,
+ void (*ScaleFilterCols)(uint16_t* dst_ptr, const uint16_t* src_ptr,
int dst_width, int x, int dx) =
(src_width >= 32768) ? ScaleFilterCols64_16_C : ScaleFilterCols_16_C;
- void (*InterpolateRow)(uint16_t * dst_ptr, const uint16_t* src_ptr,
+ void (*InterpolateRow)(uint16_t* dst_ptr, const uint16_t* src_ptr,
ptrdiff_t src_stride, int dst_width,
int source_y_fraction) = InterpolateRow_16_C;
ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y,
@@ -1212,18 +1316,19 @@ void ScalePlaneBilinearDown_16(int src_width,
}
}
free_aligned_buffer_64(row);
+ return 0;
}
// Scale up down with bilinear interpolation.
-void ScalePlaneBilinearUp(int src_width,
- int src_height,
- int dst_width,
- int dst_height,
- int src_stride,
- int dst_stride,
- const uint8_t* src_ptr,
- uint8_t* dst_ptr,
- enum FilterMode filtering) {
+static int ScalePlaneBilinearUp(int src_width,
+ int src_height,
+ int dst_width,
+ int dst_height,
+ int src_stride,
+ int dst_stride,
+ const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ enum FilterMode filtering) {
int j;
// Initial source x/y coordinate and step values as 16.16 fixed point.
int x = 0;
@@ -1231,10 +1336,10 @@ void ScalePlaneBilinearUp(int src_width,
int dx = 0;
int dy = 0;
const int max_y = (src_height - 1) << 16;
- void (*InterpolateRow)(uint8_t * dst_ptr, const uint8_t* src_ptr,
+ void (*InterpolateRow)(uint8_t* dst_ptr, const uint8_t* src_ptr,
ptrdiff_t src_stride, int dst_width,
int source_y_fraction) = InterpolateRow_C;
- void (*ScaleFilterCols)(uint8_t * dst_ptr, const uint8_t* src_ptr,
+ void (*ScaleFilterCols)(uint8_t* dst_ptr, const uint8_t* src_ptr,
int dst_width, int x, int dx) =
filtering ? ScaleFilterCols_C : ScaleCols_C;
ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y,
@@ -1265,6 +1370,11 @@ void ScalePlaneBilinearUp(int src_width,
}
}
#endif
+#if defined(HAS_INTERPOLATEROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ InterpolateRow = InterpolateRow_RVV;
+ }
+#endif
if (filtering && src_width >= 32768) {
ScaleFilterCols = ScaleFilterCols64_C;
@@ -1315,11 +1425,13 @@ void ScalePlaneBilinearUp(int src_width,
const uint8_t* src = src_ptr + yi * (int64_t)src_stride;
// Allocate 2 row buffers.
- const int kRowSize = (dst_width + 31) & ~31;
- align_buffer_64(row, kRowSize * 2);
+ const int row_size = (dst_width + 31) & ~31;
+ align_buffer_64(row, row_size * 2);
+ if (!row)
+ return 1;
uint8_t* rowptr = row;
- int rowstride = kRowSize;
+ int rowstride = row_size;
int lasty = yi;
ScaleFilterCols(rowptr, src, dst_width, x, dx);
@@ -1360,6 +1472,7 @@ void ScalePlaneBilinearUp(int src_width,
}
free_aligned_buffer_64(row);
}
+ return 0;
}
// Scale plane, horizontally up by 2 times.
@@ -1367,20 +1480,21 @@ void ScalePlaneBilinearUp(int src_width,
// This is an optimized version for scaling up a plane to 2 times of
// its original width, using linear interpolation.
// This is used to scale U and V planes of I422 to I444.
-void ScalePlaneUp2_Linear(int src_width,
- int src_height,
- int dst_width,
- int dst_height,
- int src_stride,
- int dst_stride,
- const uint8_t* src_ptr,
- uint8_t* dst_ptr) {
+static void ScalePlaneUp2_Linear(int src_width,
+ int src_height,
+ int dst_width,
+ int dst_height,
+ int src_stride,
+ int dst_stride,
+ const uint8_t* src_ptr,
+ uint8_t* dst_ptr) {
void (*ScaleRowUp)(const uint8_t* src_ptr, uint8_t* dst_ptr, int dst_width) =
ScaleRowUp2_Linear_Any_C;
int i;
int y;
int dy;
+ (void)src_width;
// This function can only scale up by 2 times horizontally.
assert(src_width == ((dst_width + 1) / 2));
@@ -1407,6 +1521,11 @@ void ScalePlaneUp2_Linear(int src_width,
ScaleRowUp = ScaleRowUp2_Linear_Any_NEON;
}
#endif
+#ifdef HAS_SCALEROWUP2_LINEAR_RVV
+ if (TestCpuFlag(kCpuHasRVV)) {
+ ScaleRowUp = ScaleRowUp2_Linear_RVV;
+ }
+#endif
if (dst_height == 1) {
ScaleRowUp(src_ptr + ((src_height - 1) / 2) * (int64_t)src_stride, dst_ptr,
@@ -1426,19 +1545,20 @@ void ScalePlaneUp2_Linear(int src_width,
// This is an optimized version for scaling up a plane to 2 times of
// its original size, using bilinear interpolation.
// This is used to scale U and V planes of I420 to I444.
-void ScalePlaneUp2_Bilinear(int src_width,
- int src_height,
- int dst_width,
- int dst_height,
- int src_stride,
- int dst_stride,
- const uint8_t* src_ptr,
- uint8_t* dst_ptr) {
+static void ScalePlaneUp2_Bilinear(int src_width,
+ int src_height,
+ int dst_width,
+ int dst_height,
+ int src_stride,
+ int dst_stride,
+ const uint8_t* src_ptr,
+ uint8_t* dst_ptr) {
void (*Scale2RowUp)(const uint8_t* src_ptr, ptrdiff_t src_stride,
uint8_t* dst_ptr, ptrdiff_t dst_stride, int dst_width) =
ScaleRowUp2_Bilinear_Any_C;
int x;
+ (void)src_width;
// This function can only scale up by 2 times.
assert(src_width == ((dst_width + 1) / 2));
assert(src_height == ((dst_height + 1) / 2));
@@ -1466,6 +1586,11 @@ void ScalePlaneUp2_Bilinear(int src_width,
Scale2RowUp = ScaleRowUp2_Bilinear_Any_NEON;
}
#endif
+#ifdef HAS_SCALEROWUP2_BILINEAR_RVV
+ if (TestCpuFlag(kCpuHasRVV)) {
+ Scale2RowUp = ScaleRowUp2_Bilinear_RVV;
+ }
+#endif
Scale2RowUp(src_ptr, 0, dst_ptr, 0, dst_width);
dst_ptr += dst_stride;
@@ -1486,20 +1611,21 @@ void ScalePlaneUp2_Bilinear(int src_width,
// its original width, using linear interpolation.
// stride is in count of uint16_t.
// This is used to scale U and V planes of I210 to I410 and I212 to I412.
-void ScalePlaneUp2_12_Linear(int src_width,
- int src_height,
- int dst_width,
- int dst_height,
- int src_stride,
- int dst_stride,
- const uint16_t* src_ptr,
- uint16_t* dst_ptr) {
+static void ScalePlaneUp2_12_Linear(int src_width,
+ int src_height,
+ int dst_width,
+ int dst_height,
+ int src_stride,
+ int dst_stride,
+ const uint16_t* src_ptr,
+ uint16_t* dst_ptr) {
void (*ScaleRowUp)(const uint16_t* src_ptr, uint16_t* dst_ptr,
int dst_width) = ScaleRowUp2_Linear_16_Any_C;
int i;
int y;
int dy;
+ (void)src_width;
// This function can only scale up by 2 times horizontally.
assert(src_width == ((dst_width + 1) / 2));
@@ -1540,19 +1666,20 @@ void ScalePlaneUp2_12_Linear(int src_width,
// its original size, using bilinear interpolation.
// stride is in count of uint16_t.
// This is used to scale U and V planes of I010 to I410 and I012 to I412.
-void ScalePlaneUp2_12_Bilinear(int src_width,
- int src_height,
- int dst_width,
- int dst_height,
- int src_stride,
- int dst_stride,
- const uint16_t* src_ptr,
- uint16_t* dst_ptr) {
+static void ScalePlaneUp2_12_Bilinear(int src_width,
+ int src_height,
+ int dst_width,
+ int dst_height,
+ int src_stride,
+ int dst_stride,
+ const uint16_t* src_ptr,
+ uint16_t* dst_ptr) {
void (*Scale2RowUp)(const uint16_t* src_ptr, ptrdiff_t src_stride,
uint16_t* dst_ptr, ptrdiff_t dst_stride, int dst_width) =
ScaleRowUp2_Bilinear_16_Any_C;
int x;
+ (void)src_width;
// This function can only scale up by 2 times.
assert(src_width == ((dst_width + 1) / 2));
assert(src_height == ((dst_height + 1) / 2));
@@ -1587,20 +1714,21 @@ void ScalePlaneUp2_12_Bilinear(int src_width,
}
}
-void ScalePlaneUp2_16_Linear(int src_width,
- int src_height,
- int dst_width,
- int dst_height,
- int src_stride,
- int dst_stride,
- const uint16_t* src_ptr,
- uint16_t* dst_ptr) {
+static void ScalePlaneUp2_16_Linear(int src_width,
+ int src_height,
+ int dst_width,
+ int dst_height,
+ int src_stride,
+ int dst_stride,
+ const uint16_t* src_ptr,
+ uint16_t* dst_ptr) {
void (*ScaleRowUp)(const uint16_t* src_ptr, uint16_t* dst_ptr,
int dst_width) = ScaleRowUp2_Linear_16_Any_C;
int i;
int y;
int dy;
+ (void)src_width;
// This function can only scale up by 2 times horizontally.
assert(src_width == ((dst_width + 1) / 2));
@@ -1636,19 +1764,20 @@ void ScalePlaneUp2_16_Linear(int src_width,
}
}
-void ScalePlaneUp2_16_Bilinear(int src_width,
- int src_height,
- int dst_width,
- int dst_height,
- int src_stride,
- int dst_stride,
- const uint16_t* src_ptr,
- uint16_t* dst_ptr) {
+static void ScalePlaneUp2_16_Bilinear(int src_width,
+ int src_height,
+ int dst_width,
+ int dst_height,
+ int src_stride,
+ int dst_stride,
+ const uint16_t* src_ptr,
+ uint16_t* dst_ptr) {
void (*Scale2RowUp)(const uint16_t* src_ptr, ptrdiff_t src_stride,
uint16_t* dst_ptr, ptrdiff_t dst_stride, int dst_width) =
ScaleRowUp2_Bilinear_16_Any_C;
int x;
+ (void)src_width;
// This function can only scale up by 2 times.
assert(src_width == ((dst_width + 1) / 2));
assert(src_height == ((dst_height + 1) / 2));
@@ -1683,15 +1812,15 @@ void ScalePlaneUp2_16_Bilinear(int src_width,
}
}
-void ScalePlaneBilinearUp_16(int src_width,
- int src_height,
- int dst_width,
- int dst_height,
- int src_stride,
- int dst_stride,
- const uint16_t* src_ptr,
- uint16_t* dst_ptr,
- enum FilterMode filtering) {
+static int ScalePlaneBilinearUp_16(int src_width,
+ int src_height,
+ int dst_width,
+ int dst_height,
+ int src_stride,
+ int dst_stride,
+ const uint16_t* src_ptr,
+ uint16_t* dst_ptr,
+ enum FilterMode filtering) {
int j;
// Initial source x/y coordinate and step values as 16.16 fixed point.
int x = 0;
@@ -1699,10 +1828,10 @@ void ScalePlaneBilinearUp_16(int src_width,
int dx = 0;
int dy = 0;
const int max_y = (src_height - 1) << 16;
- void (*InterpolateRow)(uint16_t * dst_ptr, const uint16_t* src_ptr,
+ void (*InterpolateRow)(uint16_t* dst_ptr, const uint16_t* src_ptr,
ptrdiff_t src_stride, int dst_width,
int source_y_fraction) = InterpolateRow_16_C;
- void (*ScaleFilterCols)(uint16_t * dst_ptr, const uint16_t* src_ptr,
+ void (*ScaleFilterCols)(uint16_t* dst_ptr, const uint16_t* src_ptr,
int dst_width, int x, int dx) =
filtering ? ScaleFilterCols_16_C : ScaleCols_16_C;
ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y,
@@ -1766,12 +1895,13 @@ void ScalePlaneBilinearUp_16(int src_width,
const uint16_t* src = src_ptr + yi * (int64_t)src_stride;
// Allocate 2 row buffers.
- const int kRowSize = (dst_width + 31) & ~31;
- align_buffer_64(row, kRowSize * 4);
-
- uint16_t* rowptr = (uint16_t*)row;
- int rowstride = kRowSize;
+ const int row_size = (dst_width + 31) & ~31;
+ align_buffer_64(row, row_size * 4);
+ int rowstride = row_size;
int lasty = yi;
+ uint16_t* rowptr = (uint16_t*)row;
+ if (!row)
+ return 1;
ScaleFilterCols(rowptr, src, dst_width, x, dx);
if (src_height > 1) {
@@ -1811,6 +1941,7 @@ void ScalePlaneBilinearUp_16(int src_width,
}
free_aligned_buffer_64(row);
}
+ return 0;
}
// Scale Plane to/from any dimensions, without interpolation.
@@ -1827,7 +1958,7 @@ static void ScalePlaneSimple(int src_width,
const uint8_t* src_ptr,
uint8_t* dst_ptr) {
int i;
- void (*ScaleCols)(uint8_t * dst_ptr, const uint8_t* src_ptr, int dst_width,
+ void (*ScaleCols)(uint8_t* dst_ptr, const uint8_t* src_ptr, int dst_width,
int x, int dx) = ScaleCols_C;
// Initial source x/y coordinate and step values as 16.16 fixed point.
int x = 0;
@@ -1864,7 +1995,7 @@ static void ScalePlaneSimple_16(int src_width,
const uint16_t* src_ptr,
uint16_t* dst_ptr) {
int i;
- void (*ScaleCols)(uint16_t * dst_ptr, const uint16_t* src_ptr, int dst_width,
+ void (*ScaleCols)(uint16_t* dst_ptr, const uint16_t* src_ptr, int dst_width,
int x, int dx) = ScaleCols_16_C;
// Initial source x/y coordinate and step values as 16.16 fixed point.
int x = 0;
@@ -1895,15 +2026,15 @@ static void ScalePlaneSimple_16(int src_width,
// Scale a plane.
// This function dispatches to a specialized scaler based on scale factor.
LIBYUV_API
-void ScalePlane(const uint8_t* src,
- int src_stride,
- int src_width,
- int src_height,
- uint8_t* dst,
- int dst_stride,
- int dst_width,
- int dst_height,
- enum FilterMode filtering) {
+int ScalePlane(const uint8_t* src,
+ int src_stride,
+ int src_width,
+ int src_height,
+ uint8_t* dst,
+ int dst_stride,
+ int dst_width,
+ int dst_height,
+ enum FilterMode filtering) {
// Simplify filtering when possible.
filtering = ScaleFilterReduce(src_width, src_height, dst_width, dst_height,
filtering);
@@ -1919,7 +2050,7 @@ void ScalePlane(const uint8_t* src,
if (dst_width == src_width && dst_height == src_height) {
// Straight copy.
CopyPlane(src, src_stride, dst, dst_stride, dst_width, dst_height);
- return;
+ return 0;
}
if (dst_width == src_width && filtering != kFilterBox) {
int dy = 0;
@@ -1935,7 +2066,7 @@ void ScalePlane(const uint8_t* src,
// Arbitrary scale vertically, but unscaled horizontally.
ScalePlaneVertical(src_height, dst_width, dst_height, src_stride,
dst_stride, src, dst, 0, y, dy, /*bpp=*/1, filtering);
- return;
+ return 0;
}
if (dst_width <= Abs(src_width) && dst_height <= src_height) {
// Scale down.
@@ -1943,69 +2074,67 @@ void ScalePlane(const uint8_t* src,
// optimized, 3/4
ScalePlaneDown34(src_width, src_height, dst_width, dst_height, src_stride,
dst_stride, src, dst, filtering);
- return;
+ return 0;
}
if (2 * dst_width == src_width && 2 * dst_height == src_height) {
// optimized, 1/2
ScalePlaneDown2(src_width, src_height, dst_width, dst_height, src_stride,
dst_stride, src, dst, filtering);
- return;
+ return 0;
}
// 3/8 rounded up for odd sized chroma height.
if (8 * dst_width == 3 * src_width && 8 * dst_height == 3 * src_height) {
// optimized, 3/8
ScalePlaneDown38(src_width, src_height, dst_width, dst_height, src_stride,
dst_stride, src, dst, filtering);
- return;
+ return 0;
}
if (4 * dst_width == src_width && 4 * dst_height == src_height &&
(filtering == kFilterBox || filtering == kFilterNone)) {
// optimized, 1/4
ScalePlaneDown4(src_width, src_height, dst_width, dst_height, src_stride,
dst_stride, src, dst, filtering);
- return;
+ return 0;
}
}
if (filtering == kFilterBox && dst_height * 2 < src_height) {
- ScalePlaneBox(src_width, src_height, dst_width, dst_height, src_stride,
- dst_stride, src, dst);
- return;
+ return ScalePlaneBox(src_width, src_height, dst_width, dst_height,
+ src_stride, dst_stride, src, dst);
}
if ((dst_width + 1) / 2 == src_width && filtering == kFilterLinear) {
ScalePlaneUp2_Linear(src_width, src_height, dst_width, dst_height,
src_stride, dst_stride, src, dst);
- return;
+ return 0;
}
if ((dst_height + 1) / 2 == src_height && (dst_width + 1) / 2 == src_width &&
(filtering == kFilterBilinear || filtering == kFilterBox)) {
ScalePlaneUp2_Bilinear(src_width, src_height, dst_width, dst_height,
src_stride, dst_stride, src, dst);
- return;
+ return 0;
}
if (filtering && dst_height > src_height) {
- ScalePlaneBilinearUp(src_width, src_height, dst_width, dst_height,
- src_stride, dst_stride, src, dst, filtering);
- return;
+ return ScalePlaneBilinearUp(src_width, src_height, dst_width, dst_height,
+ src_stride, dst_stride, src, dst, filtering);
}
if (filtering) {
- ScalePlaneBilinearDown(src_width, src_height, dst_width, dst_height,
- src_stride, dst_stride, src, dst, filtering);
- return;
+ return ScalePlaneBilinearDown(src_width, src_height, dst_width, dst_height,
+ src_stride, dst_stride, src, dst, filtering);
}
ScalePlaneSimple(src_width, src_height, dst_width, dst_height, src_stride,
dst_stride, src, dst);
+ return 0;
}
LIBYUV_API
-void ScalePlane_16(const uint16_t* src,
- int src_stride,
- int src_width,
- int src_height,
- uint16_t* dst,
- int dst_stride,
- int dst_width,
- int dst_height,
- enum FilterMode filtering) {
+int ScalePlane_16(const uint16_t* src,
+ int src_stride,
+ int src_width,
+ int src_height,
+ uint16_t* dst,
+ int dst_stride,
+ int dst_width,
+ int dst_height,
+ enum FilterMode filtering) {
// Simplify filtering when possible.
filtering = ScaleFilterReduce(src_width, src_height, dst_width, dst_height,
filtering);
@@ -2021,7 +2150,7 @@ void ScalePlane_16(const uint16_t* src,
if (dst_width == src_width && dst_height == src_height) {
// Straight copy.
CopyPlane_16(src, src_stride, dst, dst_stride, dst_width, dst_height);
- return;
+ return 0;
}
if (dst_width == src_width && filtering != kFilterBox) {
int dy = 0;
@@ -2040,7 +2169,7 @@ void ScalePlane_16(const uint16_t* src,
// Arbitrary scale vertically, but unscaled horizontally.
ScalePlaneVertical_16(src_height, dst_width, dst_height, src_stride,
dst_stride, src, dst, 0, y, dy, /*bpp=*/1, filtering);
- return;
+ return 0;
}
if (dst_width <= Abs(src_width) && dst_height <= src_height) {
// Scale down.
@@ -2048,69 +2177,68 @@ void ScalePlane_16(const uint16_t* src,
// optimized, 3/4
ScalePlaneDown34_16(src_width, src_height, dst_width, dst_height,
src_stride, dst_stride, src, dst, filtering);
- return;
+ return 0;
}
if (2 * dst_width == src_width && 2 * dst_height == src_height) {
// optimized, 1/2
ScalePlaneDown2_16(src_width, src_height, dst_width, dst_height,
src_stride, dst_stride, src, dst, filtering);
- return;
+ return 0;
}
// 3/8 rounded up for odd sized chroma height.
if (8 * dst_width == 3 * src_width && 8 * dst_height == 3 * src_height) {
// optimized, 3/8
ScalePlaneDown38_16(src_width, src_height, dst_width, dst_height,
src_stride, dst_stride, src, dst, filtering);
- return;
+ return 0;
}
if (4 * dst_width == src_width && 4 * dst_height == src_height &&
(filtering == kFilterBox || filtering == kFilterNone)) {
// optimized, 1/4
ScalePlaneDown4_16(src_width, src_height, dst_width, dst_height,
src_stride, dst_stride, src, dst, filtering);
- return;
+ return 0;
}
}
if (filtering == kFilterBox && dst_height * 2 < src_height) {
- ScalePlaneBox_16(src_width, src_height, dst_width, dst_height, src_stride,
- dst_stride, src, dst);
- return;
+ return ScalePlaneBox_16(src_width, src_height, dst_width, dst_height,
+ src_stride, dst_stride, src, dst);
}
if ((dst_width + 1) / 2 == src_width && filtering == kFilterLinear) {
ScalePlaneUp2_16_Linear(src_width, src_height, dst_width, dst_height,
src_stride, dst_stride, src, dst);
- return;
+ return 0;
}
if ((dst_height + 1) / 2 == src_height && (dst_width + 1) / 2 == src_width &&
(filtering == kFilterBilinear || filtering == kFilterBox)) {
ScalePlaneUp2_16_Bilinear(src_width, src_height, dst_width, dst_height,
src_stride, dst_stride, src, dst);
- return;
+ return 0;
}
if (filtering && dst_height > src_height) {
- ScalePlaneBilinearUp_16(src_width, src_height, dst_width, dst_height,
- src_stride, dst_stride, src, dst, filtering);
- return;
+ return ScalePlaneBilinearUp_16(src_width, src_height, dst_width, dst_height,
+ src_stride, dst_stride, src, dst, filtering);
}
if (filtering) {
- ScalePlaneBilinearDown_16(src_width, src_height, dst_width, dst_height,
- src_stride, dst_stride, src, dst, filtering);
- return;
+ return ScalePlaneBilinearDown_16(src_width, src_height, dst_width,
+ dst_height, src_stride, dst_stride, src,
+ dst, filtering);
}
ScalePlaneSimple_16(src_width, src_height, dst_width, dst_height, src_stride,
dst_stride, src, dst);
+ return 0;
}
LIBYUV_API
-void ScalePlane_12(const uint16_t* src,
- int src_stride,
- int src_width,
- int src_height,
- uint16_t* dst,
- int dst_stride,
- int dst_width,
- int dst_height,
- enum FilterMode filtering) {
+int ScalePlane_12(const uint16_t* src,
+ int src_stride,
+ int src_width,
+ int src_height,
+ uint16_t* dst,
+ int dst_stride,
+ int dst_width,
+ int dst_height,
+ enum FilterMode filtering) {
// Simplify filtering when possible.
filtering = ScaleFilterReduce(src_width, src_height, dst_width, dst_height,
filtering);
@@ -2125,17 +2253,17 @@ void ScalePlane_12(const uint16_t* src,
if ((dst_width + 1) / 2 == src_width && filtering == kFilterLinear) {
ScalePlaneUp2_12_Linear(src_width, src_height, dst_width, dst_height,
src_stride, dst_stride, src, dst);
- return;
+ return 0;
}
if ((dst_height + 1) / 2 == src_height && (dst_width + 1) / 2 == src_width &&
(filtering == kFilterBilinear || filtering == kFilterBox)) {
ScalePlaneUp2_12_Bilinear(src_width, src_height, dst_width, dst_height,
src_stride, dst_stride, src, dst);
- return;
+ return 0;
}
- ScalePlane_16(src, src_stride, src_width, src_height, dst, dst_stride,
- dst_width, dst_height, filtering);
+ return ScalePlane_16(src, src_stride, src_width, src_height, dst, dst_stride,
+ dst_width, dst_height, filtering);
}
// Scale an I420 image.
@@ -2163,6 +2291,7 @@ int I420Scale(const uint8_t* src_y,
int src_halfheight = SUBSAMPLE(src_height, 1, 1);
int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1);
int dst_halfheight = SUBSAMPLE(dst_height, 1, 1);
+ int r;
if (!src_y || !src_u || !src_v || src_width <= 0 || src_height == 0 ||
src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v ||
@@ -2170,13 +2299,19 @@ int I420Scale(const uint8_t* src_y,
return -1;
}
- ScalePlane(src_y, src_stride_y, src_width, src_height, dst_y, dst_stride_y,
- dst_width, dst_height, filtering);
- ScalePlane(src_u, src_stride_u, src_halfwidth, src_halfheight, dst_u,
- dst_stride_u, dst_halfwidth, dst_halfheight, filtering);
- ScalePlane(src_v, src_stride_v, src_halfwidth, src_halfheight, dst_v,
- dst_stride_v, dst_halfwidth, dst_halfheight, filtering);
- return 0;
+ r = ScalePlane(src_y, src_stride_y, src_width, src_height, dst_y,
+ dst_stride_y, dst_width, dst_height, filtering);
+ if (r != 0) {
+ return r;
+ }
+ r = ScalePlane(src_u, src_stride_u, src_halfwidth, src_halfheight, dst_u,
+ dst_stride_u, dst_halfwidth, dst_halfheight, filtering);
+ if (r != 0) {
+ return r;
+ }
+ r = ScalePlane(src_v, src_stride_v, src_halfwidth, src_halfheight, dst_v,
+ dst_stride_v, dst_halfwidth, dst_halfheight, filtering);
+ return r;
}
LIBYUV_API
@@ -2201,6 +2336,7 @@ int I420Scale_16(const uint16_t* src_y,
int src_halfheight = SUBSAMPLE(src_height, 1, 1);
int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1);
int dst_halfheight = SUBSAMPLE(dst_height, 1, 1);
+ int r;
if (!src_y || !src_u || !src_v || src_width <= 0 || src_height == 0 ||
src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v ||
@@ -2208,13 +2344,19 @@ int I420Scale_16(const uint16_t* src_y,
return -1;
}
- ScalePlane_16(src_y, src_stride_y, src_width, src_height, dst_y, dst_stride_y,
- dst_width, dst_height, filtering);
- ScalePlane_16(src_u, src_stride_u, src_halfwidth, src_halfheight, dst_u,
- dst_stride_u, dst_halfwidth, dst_halfheight, filtering);
- ScalePlane_16(src_v, src_stride_v, src_halfwidth, src_halfheight, dst_v,
- dst_stride_v, dst_halfwidth, dst_halfheight, filtering);
- return 0;
+ r = ScalePlane_16(src_y, src_stride_y, src_width, src_height, dst_y,
+ dst_stride_y, dst_width, dst_height, filtering);
+ if (r != 0) {
+ return r;
+ }
+ r = ScalePlane_16(src_u, src_stride_u, src_halfwidth, src_halfheight, dst_u,
+ dst_stride_u, dst_halfwidth, dst_halfheight, filtering);
+ if (r != 0) {
+ return r;
+ }
+ r = ScalePlane_16(src_v, src_stride_v, src_halfwidth, src_halfheight, dst_v,
+ dst_stride_v, dst_halfwidth, dst_halfheight, filtering);
+ return r;
}
LIBYUV_API
@@ -2239,6 +2381,7 @@ int I420Scale_12(const uint16_t* src_y,
int src_halfheight = SUBSAMPLE(src_height, 1, 1);
int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1);
int dst_halfheight = SUBSAMPLE(dst_height, 1, 1);
+ int r;
if (!src_y || !src_u || !src_v || src_width <= 0 || src_height == 0 ||
src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v ||
@@ -2246,13 +2389,19 @@ int I420Scale_12(const uint16_t* src_y,
return -1;
}
- ScalePlane_12(src_y, src_stride_y, src_width, src_height, dst_y, dst_stride_y,
- dst_width, dst_height, filtering);
- ScalePlane_12(src_u, src_stride_u, src_halfwidth, src_halfheight, dst_u,
- dst_stride_u, dst_halfwidth, dst_halfheight, filtering);
- ScalePlane_12(src_v, src_stride_v, src_halfwidth, src_halfheight, dst_v,
- dst_stride_v, dst_halfwidth, dst_halfheight, filtering);
- return 0;
+ r = ScalePlane_12(src_y, src_stride_y, src_width, src_height, dst_y,
+ dst_stride_y, dst_width, dst_height, filtering);
+ if (r != 0) {
+ return r;
+ }
+ r = ScalePlane_12(src_u, src_stride_u, src_halfwidth, src_halfheight, dst_u,
+ dst_stride_u, dst_halfwidth, dst_halfheight, filtering);
+ if (r != 0) {
+ return r;
+ }
+ r = ScalePlane_12(src_v, src_stride_v, src_halfwidth, src_halfheight, dst_v,
+ dst_stride_v, dst_halfwidth, dst_halfheight, filtering);
+ return r;
}
// Scale an I444 image.
@@ -2276,19 +2425,27 @@ int I444Scale(const uint8_t* src_y,
int dst_width,
int dst_height,
enum FilterMode filtering) {
+ int r;
+
if (!src_y || !src_u || !src_v || src_width <= 0 || src_height == 0 ||
src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v ||
dst_width <= 0 || dst_height <= 0) {
return -1;
}
- ScalePlane(src_y, src_stride_y, src_width, src_height, dst_y, dst_stride_y,
- dst_width, dst_height, filtering);
- ScalePlane(src_u, src_stride_u, src_width, src_height, dst_u, dst_stride_u,
- dst_width, dst_height, filtering);
- ScalePlane(src_v, src_stride_v, src_width, src_height, dst_v, dst_stride_v,
- dst_width, dst_height, filtering);
- return 0;
+ r = ScalePlane(src_y, src_stride_y, src_width, src_height, dst_y,
+ dst_stride_y, dst_width, dst_height, filtering);
+ if (r != 0) {
+ return r;
+ }
+ r = ScalePlane(src_u, src_stride_u, src_width, src_height, dst_u,
+ dst_stride_u, dst_width, dst_height, filtering);
+ if (r != 0) {
+ return r;
+ }
+ r = ScalePlane(src_v, src_stride_v, src_width, src_height, dst_v,
+ dst_stride_v, dst_width, dst_height, filtering);
+ return r;
}
LIBYUV_API
@@ -2309,19 +2466,27 @@ int I444Scale_16(const uint16_t* src_y,
int dst_width,
int dst_height,
enum FilterMode filtering) {
+ int r;
+
if (!src_y || !src_u || !src_v || src_width <= 0 || src_height == 0 ||
src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v ||
dst_width <= 0 || dst_height <= 0) {
return -1;
}
- ScalePlane_16(src_y, src_stride_y, src_width, src_height, dst_y, dst_stride_y,
- dst_width, dst_height, filtering);
- ScalePlane_16(src_u, src_stride_u, src_width, src_height, dst_u, dst_stride_u,
- dst_width, dst_height, filtering);
- ScalePlane_16(src_v, src_stride_v, src_width, src_height, dst_v, dst_stride_v,
- dst_width, dst_height, filtering);
- return 0;
+ r = ScalePlane_16(src_y, src_stride_y, src_width, src_height, dst_y,
+ dst_stride_y, dst_width, dst_height, filtering);
+ if (r != 0) {
+ return r;
+ }
+ r = ScalePlane_16(src_u, src_stride_u, src_width, src_height, dst_u,
+ dst_stride_u, dst_width, dst_height, filtering);
+ if (r != 0) {
+ return r;
+ }
+ r = ScalePlane_16(src_v, src_stride_v, src_width, src_height, dst_v,
+ dst_stride_v, dst_width, dst_height, filtering);
+ return r;
}
LIBYUV_API
@@ -2342,19 +2507,27 @@ int I444Scale_12(const uint16_t* src_y,
int dst_width,
int dst_height,
enum FilterMode filtering) {
+ int r;
+
if (!src_y || !src_u || !src_v || src_width <= 0 || src_height == 0 ||
src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v ||
dst_width <= 0 || dst_height <= 0) {
return -1;
}
- ScalePlane_12(src_y, src_stride_y, src_width, src_height, dst_y, dst_stride_y,
- dst_width, dst_height, filtering);
- ScalePlane_12(src_u, src_stride_u, src_width, src_height, dst_u, dst_stride_u,
- dst_width, dst_height, filtering);
- ScalePlane_12(src_v, src_stride_v, src_width, src_height, dst_v, dst_stride_v,
- dst_width, dst_height, filtering);
- return 0;
+ r = ScalePlane_12(src_y, src_stride_y, src_width, src_height, dst_y,
+ dst_stride_y, dst_width, dst_height, filtering);
+ if (r != 0) {
+ return r;
+ }
+ r = ScalePlane_12(src_u, src_stride_u, src_width, src_height, dst_u,
+ dst_stride_u, dst_width, dst_height, filtering);
+ if (r != 0) {
+ return r;
+ }
+ r = ScalePlane_12(src_v, src_stride_v, src_width, src_height, dst_v,
+ dst_stride_v, dst_width, dst_height, filtering);
+ return r;
}
// Scale an I422 image.
@@ -2380,6 +2553,7 @@ int I422Scale(const uint8_t* src_y,
enum FilterMode filtering) {
int src_halfwidth = SUBSAMPLE(src_width, 1, 1);
int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1);
+ int r;
if (!src_y || !src_u || !src_v || src_width <= 0 || src_height == 0 ||
src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v ||
@@ -2387,13 +2561,19 @@ int I422Scale(const uint8_t* src_y,
return -1;
}
- ScalePlane(src_y, src_stride_y, src_width, src_height, dst_y, dst_stride_y,
- dst_width, dst_height, filtering);
- ScalePlane(src_u, src_stride_u, src_halfwidth, src_height, dst_u,
- dst_stride_u, dst_halfwidth, dst_height, filtering);
- ScalePlane(src_v, src_stride_v, src_halfwidth, src_height, dst_v,
- dst_stride_v, dst_halfwidth, dst_height, filtering);
- return 0;
+ r = ScalePlane(src_y, src_stride_y, src_width, src_height, dst_y,
+ dst_stride_y, dst_width, dst_height, filtering);
+ if (r != 0) {
+ return r;
+ }
+ r = ScalePlane(src_u, src_stride_u, src_halfwidth, src_height, dst_u,
+ dst_stride_u, dst_halfwidth, dst_height, filtering);
+ if (r != 0) {
+ return r;
+ }
+ r = ScalePlane(src_v, src_stride_v, src_halfwidth, src_height, dst_v,
+ dst_stride_v, dst_halfwidth, dst_height, filtering);
+ return r;
}
LIBYUV_API
@@ -2416,6 +2596,7 @@ int I422Scale_16(const uint16_t* src_y,
enum FilterMode filtering) {
int src_halfwidth = SUBSAMPLE(src_width, 1, 1);
int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1);
+ int r;
if (!src_y || !src_u || !src_v || src_width <= 0 || src_height == 0 ||
src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v ||
@@ -2423,13 +2604,19 @@ int I422Scale_16(const uint16_t* src_y,
return -1;
}
- ScalePlane_16(src_y, src_stride_y, src_width, src_height, dst_y, dst_stride_y,
- dst_width, dst_height, filtering);
- ScalePlane_16(src_u, src_stride_u, src_halfwidth, src_height, dst_u,
- dst_stride_u, dst_halfwidth, dst_height, filtering);
- ScalePlane_16(src_v, src_stride_v, src_halfwidth, src_height, dst_v,
- dst_stride_v, dst_halfwidth, dst_height, filtering);
- return 0;
+ r = ScalePlane_16(src_y, src_stride_y, src_width, src_height, dst_y,
+ dst_stride_y, dst_width, dst_height, filtering);
+ if (r != 0) {
+ return r;
+ }
+ r = ScalePlane_16(src_u, src_stride_u, src_halfwidth, src_height, dst_u,
+ dst_stride_u, dst_halfwidth, dst_height, filtering);
+ if (r != 0) {
+ return r;
+ }
+ r = ScalePlane_16(src_v, src_stride_v, src_halfwidth, src_height, dst_v,
+ dst_stride_v, dst_halfwidth, dst_height, filtering);
+ return r;
}
LIBYUV_API
@@ -2452,6 +2639,7 @@ int I422Scale_12(const uint16_t* src_y,
enum FilterMode filtering) {
int src_halfwidth = SUBSAMPLE(src_width, 1, 1);
int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1);
+ int r;
if (!src_y || !src_u || !src_v || src_width <= 0 || src_height == 0 ||
src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v ||
@@ -2459,13 +2647,19 @@ int I422Scale_12(const uint16_t* src_y,
return -1;
}
- ScalePlane_12(src_y, src_stride_y, src_width, src_height, dst_y, dst_stride_y,
- dst_width, dst_height, filtering);
- ScalePlane_12(src_u, src_stride_u, src_halfwidth, src_height, dst_u,
- dst_stride_u, dst_halfwidth, dst_height, filtering);
- ScalePlane_12(src_v, src_stride_v, src_halfwidth, src_height, dst_v,
- dst_stride_v, dst_halfwidth, dst_height, filtering);
- return 0;
+ r = ScalePlane_12(src_y, src_stride_y, src_width, src_height, dst_y,
+ dst_stride_y, dst_width, dst_height, filtering);
+ if (r != 0) {
+ return r;
+ }
+ r = ScalePlane_12(src_u, src_stride_u, src_halfwidth, src_height, dst_u,
+ dst_stride_u, dst_halfwidth, dst_height, filtering);
+ if (r != 0) {
+ return r;
+ }
+ r = ScalePlane_12(src_v, src_stride_v, src_halfwidth, src_height, dst_v,
+ dst_stride_v, dst_halfwidth, dst_height, filtering);
+ return r;
}
// Scale an NV12 image.
@@ -2489,6 +2683,7 @@ int NV12Scale(const uint8_t* src_y,
int src_halfheight = SUBSAMPLE(src_height, 1, 1);
int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1);
int dst_halfheight = SUBSAMPLE(dst_height, 1, 1);
+ int r;
if (!src_y || !src_uv || src_width <= 0 || src_height == 0 ||
src_width > 32768 || src_height > 32768 || !dst_y || !dst_uv ||
@@ -2496,11 +2691,14 @@ int NV12Scale(const uint8_t* src_y,
return -1;
}
- ScalePlane(src_y, src_stride_y, src_width, src_height, dst_y, dst_stride_y,
- dst_width, dst_height, filtering);
- UVScale(src_uv, src_stride_uv, src_halfwidth, src_halfheight, dst_uv,
- dst_stride_uv, dst_halfwidth, dst_halfheight, filtering);
- return 0;
+ r = ScalePlane(src_y, src_stride_y, src_width, src_height, dst_y,
+ dst_stride_y, dst_width, dst_height, filtering);
+ if (r != 0) {
+ return r;
+ }
+ r = UVScale(src_uv, src_stride_uv, src_halfwidth, src_halfheight, dst_uv,
+ dst_stride_uv, dst_halfwidth, dst_halfheight, filtering);
+ return r;
}
// Deprecated api
diff --git a/files/source/scale_any.cc b/source/scale_any.cc
index 317041f8..f6576874 100644
--- a/files/source/scale_any.cc
+++ b/source/scale_any.cc
@@ -128,6 +128,22 @@ SDODD(ScaleRowDown2Box_Odd_NEON,
1,
15)
#endif
+#ifdef HAS_SCALEUVROWDOWN2_NEON
+SDANY(ScaleUVRowDown2_Any_NEON,
+ ScaleUVRowDown2_NEON,
+ ScaleUVRowDown2_C,
+ 2,
+ 2,
+ 7)
+#endif
+#ifdef HAS_SCALEUVROWDOWN2LINEAR_NEON
+SDANY(ScaleUVRowDown2Linear_Any_NEON,
+ ScaleUVRowDown2Linear_NEON,
+ ScaleUVRowDown2Linear_C,
+ 2,
+ 2,
+ 7)
+#endif
#ifdef HAS_SCALEUVROWDOWN2BOX_NEON
SDANY(ScaleUVRowDown2Box_Any_NEON,
ScaleUVRowDown2Box_NEON,
diff --git a/files/source/scale_argb.cc b/source/scale_argb.cc
index 9c3acf7f..18bdeb86 100644
--- a/files/source/scale_argb.cc
+++ b/source/scale_argb.cc
@@ -16,6 +16,7 @@
#include "libyuv/cpu_id.h"
#include "libyuv/planar_functions.h" // For CopyARGB
#include "libyuv/row.h"
+#include "libyuv/scale_argb.h"
#include "libyuv/scale_row.h"
#ifdef __cplusplus
@@ -58,9 +59,9 @@ static void ScaleARGBDown2(int src_width,
assert((dy & 0x1ffff) == 0); // Test vertical scale is multiple of 2.
// Advance to odd row, even column.
if (filtering == kFilterBilinear) {
- src_argb += (y >> 16) * (int64_t)src_stride + (x >> 16) * 4;
+ src_argb += (y >> 16) * (intptr_t)src_stride + (x >> 16) * 4;
} else {
- src_argb += (y >> 16) * (int64_t)src_stride + ((x >> 16) - 1) * 4;
+ src_argb += (y >> 16) * (intptr_t)src_stride + ((x >> 16) - 1) * 4;
}
#if defined(HAS_SCALEARGBROWDOWN2_SSE2)
@@ -127,6 +128,15 @@ static void ScaleARGBDown2(int src_width,
}
}
#endif
+#if defined(HAS_SCALEARGBROWDOWN2_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ ScaleARGBRowDown2 =
+ filtering == kFilterNone
+ ? ScaleARGBRowDown2_RVV
+ : (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_RVV
+ : ScaleARGBRowDown2Box_RVV);
+ }
+#endif
if (filtering == kFilterLinear) {
src_stride = 0;
@@ -141,28 +151,33 @@ static void ScaleARGBDown2(int src_width,
// ScaleARGB ARGB, 1/4
// This is an optimized version for scaling down a ARGB to 1/4 of
// its original size.
-static void ScaleARGBDown4Box(int src_width,
- int src_height,
- int dst_width,
- int dst_height,
- int src_stride,
- int dst_stride,
- const uint8_t* src_argb,
- uint8_t* dst_argb,
- int x,
- int dx,
- int y,
- int dy) {
+static int ScaleARGBDown4Box(int src_width,
+ int src_height,
+ int dst_width,
+ int dst_height,
+ int src_stride,
+ int dst_stride,
+ const uint8_t* src_argb,
+ uint8_t* dst_argb,
+ int x,
+ int dx,
+ int y,
+ int dy) {
int j;
// Allocate 2 rows of ARGB.
- const int kRowSize = (dst_width * 2 * 4 + 31) & ~31;
- align_buffer_64(row, kRowSize * 2);
+ const int row_size = (dst_width * 2 * 4 + 31) & ~31;
+ // TODO(fbarchard): Remove this row buffer and implement a ScaleARGBRowDown4
+ // but implemented via a 2 pass wrapper that uses a very small array on the
+ // stack with a horizontal loop.
+ align_buffer_64(row, row_size * 2);
+ if (!row)
+ return 1;
int row_stride = src_stride * (dy >> 16);
void (*ScaleARGBRowDown2)(const uint8_t* src_argb, ptrdiff_t src_stride,
uint8_t* dst_argb, int dst_width) =
ScaleARGBRowDown2Box_C;
// Advance to odd row, even column.
- src_argb += (y >> 16) * (int64_t)src_stride + (x >> 16) * 4;
+ src_argb += (y >> 16) * (intptr_t)src_stride + (x >> 16) * 4;
(void)src_width;
(void)src_height;
(void)dx;
@@ -184,16 +199,22 @@ static void ScaleARGBDown4Box(int src_width,
}
}
#endif
+#if defined(HAS_SCALEARGBROWDOWN2_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ ScaleARGBRowDown2 = ScaleARGBRowDown2Box_RVV;
+ }
+#endif
for (j = 0; j < dst_height; ++j) {
ScaleARGBRowDown2(src_argb, src_stride, row, dst_width * 2);
- ScaleARGBRowDown2(src_argb + src_stride * 2, src_stride, row + kRowSize,
+ ScaleARGBRowDown2(src_argb + src_stride * 2, src_stride, row + row_size,
dst_width * 2);
- ScaleARGBRowDown2(row, kRowSize, dst_argb, dst_width);
+ ScaleARGBRowDown2(row, row_size, dst_argb, dst_width);
src_argb += row_stride;
dst_argb += dst_stride;
}
free_aligned_buffer_64(row);
+ return 0;
}
// ScaleARGB ARGB Even
@@ -214,7 +235,7 @@ static void ScaleARGBDownEven(int src_width,
enum FilterMode filtering) {
int j;
int col_step = dx >> 16;
- int row_stride = (dy >> 16) * (int64_t)src_stride;
+ ptrdiff_t row_stride = (ptrdiff_t)((dy >> 16) * (intptr_t)src_stride);
void (*ScaleARGBRowDownEven)(const uint8_t* src_argb, ptrdiff_t src_stride,
int src_step, uint8_t* dst_argb, int dst_width) =
filtering ? ScaleARGBRowDownEvenBox_C : ScaleARGBRowDownEven_C;
@@ -222,7 +243,7 @@ static void ScaleARGBDownEven(int src_width,
(void)src_height;
assert(IS_ALIGNED(src_width, 2));
assert(IS_ALIGNED(src_height, 2));
- src_argb += (y >> 16) * (int64_t)src_stride + (x >> 16) * 4;
+ src_argb += (y >> 16) * (intptr_t)src_stride + (x >> 16) * 4;
#if defined(HAS_SCALEARGBROWDOWNEVEN_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_Any_SSE2
@@ -263,6 +284,16 @@ static void ScaleARGBDownEven(int src_width,
}
}
#endif
+#if defined(HAS_SCALEARGBROWDOWNEVENBOX_RVV)
+ if (filtering && TestCpuFlag(kCpuHasRVV)) {
+ ScaleARGBRowDownEven = ScaleARGBRowDownEvenBox_RVV;
+ }
+#endif
+#if defined(HAS_SCALEARGBROWDOWNEVEN_RVV)
+ if (!filtering && TestCpuFlag(kCpuHasRVV)) {
+ ScaleARGBRowDownEven = ScaleARGBRowDownEven_RVV;
+ }
+#endif
if (filtering == kFilterLinear) {
src_stride = 0;
@@ -275,24 +306,24 @@ static void ScaleARGBDownEven(int src_width,
}
// Scale ARGB down with bilinear interpolation.
-static void ScaleARGBBilinearDown(int src_width,
- int src_height,
- int dst_width,
- int dst_height,
- int src_stride,
- int dst_stride,
- const uint8_t* src_argb,
- uint8_t* dst_argb,
- int x,
- int dx,
- int y,
- int dy,
- enum FilterMode filtering) {
+static int ScaleARGBBilinearDown(int src_width,
+ int src_height,
+ int dst_width,
+ int dst_height,
+ int src_stride,
+ int dst_stride,
+ const uint8_t* src_argb,
+ uint8_t* dst_argb,
+ int x,
+ int dx,
+ int y,
+ int dy,
+ enum FilterMode filtering) {
int j;
- void (*InterpolateRow)(uint8_t * dst_argb, const uint8_t* src_argb,
+ void (*InterpolateRow)(uint8_t* dst_argb, const uint8_t* src_argb,
ptrdiff_t src_stride, int dst_width,
int source_y_fraction) = InterpolateRow_C;
- void (*ScaleARGBFilterCols)(uint8_t * dst_argb, const uint8_t* src_argb,
+ void (*ScaleARGBFilterCols)(uint8_t* dst_argb, const uint8_t* src_argb,
int dst_width, int x, int dx) =
(src_width >= 32768) ? ScaleARGBFilterCols64_C : ScaleARGBFilterCols_C;
int64_t xlast = x + (int64_t)(dst_width - 1) * dx;
@@ -348,6 +379,11 @@ static void ScaleARGBBilinearDown(int src_width,
}
}
#endif
+#if defined(HAS_INTERPOLATEROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ InterpolateRow = InterpolateRow_RVV;
+ }
+#endif
#if defined(HAS_SCALEARGBFILTERCOLS_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3;
@@ -381,6 +417,8 @@ static void ScaleARGBBilinearDown(int src_width,
// Allocate a row of ARGB.
{
align_buffer_64(row, clip_src_width * 4);
+ if (!row)
+ return 1;
const int max_y = (src_height - 1) << 16;
if (y > max_y) {
@@ -388,7 +426,7 @@ static void ScaleARGBBilinearDown(int src_width,
}
for (j = 0; j < dst_height; ++j) {
int yi = y >> 16;
- const uint8_t* src = src_argb + yi * (int64_t)src_stride;
+ const uint8_t* src = src_argb + yi * (intptr_t)src_stride;
if (filtering == kFilterLinear) {
ScaleARGBFilterCols(dst_argb, src, dst_width, x, dx);
} else {
@@ -404,27 +442,28 @@ static void ScaleARGBBilinearDown(int src_width,
}
free_aligned_buffer_64(row);
}
+ return 0;
}
// Scale ARGB up with bilinear interpolation.
-static void ScaleARGBBilinearUp(int src_width,
- int src_height,
- int dst_width,
- int dst_height,
- int src_stride,
- int dst_stride,
- const uint8_t* src_argb,
- uint8_t* dst_argb,
- int x,
- int dx,
- int y,
- int dy,
- enum FilterMode filtering) {
+static int ScaleARGBBilinearUp(int src_width,
+ int src_height,
+ int dst_width,
+ int dst_height,
+ int src_stride,
+ int dst_stride,
+ const uint8_t* src_argb,
+ uint8_t* dst_argb,
+ int x,
+ int dx,
+ int y,
+ int dy,
+ enum FilterMode filtering) {
int j;
- void (*InterpolateRow)(uint8_t * dst_argb, const uint8_t* src_argb,
+ void (*InterpolateRow)(uint8_t* dst_argb, const uint8_t* src_argb,
ptrdiff_t src_stride, int dst_width,
int source_y_fraction) = InterpolateRow_C;
- void (*ScaleARGBFilterCols)(uint8_t * dst_argb, const uint8_t* src_argb,
+ void (*ScaleARGBFilterCols)(uint8_t* dst_argb, const uint8_t* src_argb,
int dst_width, int x, int dx) =
filtering ? ScaleARGBFilterCols_C : ScaleARGBCols_C;
const int max_y = (src_height - 1) << 16;
@@ -468,6 +507,11 @@ static void ScaleARGBBilinearUp(int src_width,
}
}
#endif
+#if defined(HAS_INTERPOLATEROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ InterpolateRow = InterpolateRow_RVV;
+ }
+#endif
if (src_width >= 32768) {
ScaleARGBFilterCols =
filtering ? ScaleARGBFilterCols64_C : ScaleARGBCols64_C;
@@ -545,14 +589,16 @@ static void ScaleARGBBilinearUp(int src_width,
{
int yi = y >> 16;
- const uint8_t* src = src_argb + yi * (int64_t)src_stride;
+ const uint8_t* src = src_argb + yi * (intptr_t)src_stride;
// Allocate 2 rows of ARGB.
- const int kRowSize = (dst_width * 4 + 31) & ~31;
- align_buffer_64(row, kRowSize * 2);
+ const int row_size = (dst_width * 4 + 31) & ~31;
+ align_buffer_64(row, row_size * 2);
+ if (!row)
+ return 1;
uint8_t* rowptr = row;
- int rowstride = kRowSize;
+ int rowstride = row_size;
int lasty = yi;
ScaleARGBFilterCols(rowptr, src, dst_width, x, dx);
@@ -570,7 +616,7 @@ static void ScaleARGBBilinearUp(int src_width,
if (y > max_y) {
y = max_y;
yi = y >> 16;
- src = src_argb + yi * (int64_t)src_stride;
+ src = src_argb + yi * (intptr_t)src_stride;
}
if (yi != lasty) {
ScaleARGBFilterCols(rowptr, src, dst_width, x, dx);
@@ -593,27 +639,28 @@ static void ScaleARGBBilinearUp(int src_width,
}
free_aligned_buffer_64(row);
}
+ return 0;
}
#ifdef YUVSCALEUP
// Scale YUV to ARGB up with bilinear interpolation.
-static void ScaleYUVToARGBBilinearUp(int src_width,
- int src_height,
- int dst_width,
- int dst_height,
- int src_stride_y,
- int src_stride_u,
- int src_stride_v,
- int dst_stride_argb,
- const uint8_t* src_y,
- const uint8_t* src_u,
- const uint8_t* src_v,
- uint8_t* dst_argb,
- int x,
- int dx,
- int y,
- int dy,
- enum FilterMode filtering) {
+static int ScaleYUVToARGBBilinearUp(int src_width,
+ int src_height,
+ int dst_width,
+ int dst_height,
+ int src_stride_y,
+ int src_stride_u,
+ int src_stride_v,
+ int dst_stride_argb,
+ const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_argb,
+ int x,
+ int dx,
+ int y,
+ int dy,
+ enum FilterMode filtering) {
int j;
void (*I422ToARGBRow)(const uint8_t* y_buf, const uint8_t* u_buf,
const uint8_t* v_buf, uint8_t* rgb_buf, int width) =
@@ -659,6 +706,14 @@ static void ScaleYUVToARGBBilinearUp(int src_width,
}
}
#endif
+#if defined(HAS_I422TOARGBROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ I422ToARGBRow = I422ToARGBRow_Any_LSX;
+ if (IS_ALIGNED(src_width, 16)) {
+ I422ToARGBRow = I422ToARGBRow_LSX;
+ }
+ }
+#endif
#if defined(HAS_I422TOARGBROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
I422ToARGBRow = I422ToARGBRow_Any_LASX;
@@ -667,8 +722,13 @@ static void ScaleYUVToARGBBilinearUp(int src_width,
}
}
#endif
+#if defined(HAS_I422TOARGBROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ I422ToARGBRow = I422ToARGBRow_RVV;
+ }
+#endif
- void (*InterpolateRow)(uint8_t * dst_argb, const uint8_t* src_argb,
+ void (*InterpolateRow)(uint8_t* dst_argb, const uint8_t* src_argb,
ptrdiff_t src_stride, int dst_width,
int source_y_fraction) = InterpolateRow_C;
#if defined(HAS_INTERPOLATEROW_SSSE3)
@@ -711,8 +771,13 @@ static void ScaleYUVToARGBBilinearUp(int src_width,
}
}
#endif
+#if defined(HAS_INTERPOLATEROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ InterpolateRow = InterpolateRow_RVV;
+ }
+#endif
- void (*ScaleARGBFilterCols)(uint8_t * dst_argb, const uint8_t* src_argb,
+ void (*ScaleARGBFilterCols)(uint8_t* dst_argb, const uint8_t* src_argb,
int dst_width, int x, int dx) =
filtering ? ScaleARGBFilterCols_C : ScaleARGBCols_C;
if (src_width >= 32768) {
@@ -793,20 +858,21 @@ static void ScaleYUVToARGBBilinearUp(int src_width,
const int kYShift = 1; // Shift Y by 1 to convert Y plane to UV coordinate.
int yi = y >> 16;
int uv_yi = yi >> kYShift;
- const uint8_t* src_row_y = src_y + yi * (int64_t)src_stride_y;
- const uint8_t* src_row_u = src_u + uv_yi * (int64_t)src_stride_u;
- const uint8_t* src_row_v = src_v + uv_yi * (int64_t)src_stride_v;
+ const uint8_t* src_row_y = src_y + yi * (intptr_t)src_stride_y;
+ const uint8_t* src_row_u = src_u + uv_yi * (intptr_t)src_stride_u;
+ const uint8_t* src_row_v = src_v + uv_yi * (intptr_t)src_stride_v;
- // Allocate 2 rows of ARGB.
- const int kRowSize = (dst_width * 4 + 31) & ~31;
- align_buffer_64(row, kRowSize * 2);
-
- // Allocate 1 row of ARGB for source conversion.
- align_buffer_64(argb_row, src_width * 4);
+ // Allocate 1 row of ARGB for source conversion and 2 rows of ARGB
+ // scaled horizontally to the destination width.
+ const int row_size = (dst_width * 4 + 31) & ~31;
+ align_buffer_64(row, row_size * 2 + src_width * 4);
+ uint8_t* argb_row = row + row_size * 2;
uint8_t* rowptr = row;
- int rowstride = kRowSize;
+ int rowstride = row_size;
int lasty = yi;
+ if (!row)
+ return 1;
// TODO(fbarchard): Convert first 2 rows of YUV to ARGB.
ScaleARGBFilterCols(rowptr, src_row_y, dst_width, x, dx);
@@ -833,9 +899,9 @@ static void ScaleYUVToARGBBilinearUp(int src_width,
y = max_y;
yi = y >> 16;
uv_yi = yi >> kYShift;
- src_row_y = src_y + yi * (int64_t)src_stride_y;
- src_row_u = src_u + uv_yi * (int64_t)src_stride_u;
- src_row_v = src_v + uv_yi * (int64_t)src_stride_v;
+ src_row_y = src_y + yi * (intptr_t)src_stride_y;
+ src_row_u = src_u + uv_yi * (intptr_t)src_stride_u;
+ src_row_v = src_v + uv_yi * (intptr_t)src_stride_v;
}
if (yi != lasty) {
// TODO(fbarchard): Convert the clipped region of row.
@@ -861,7 +927,7 @@ static void ScaleYUVToARGBBilinearUp(int src_width,
y += dy;
}
free_aligned_buffer_64(row);
- free_aligned_buffer_64(row_argb);
+ return 0;
}
#endif
@@ -883,7 +949,7 @@ static void ScaleARGBSimple(int src_width,
int y,
int dy) {
int j;
- void (*ScaleARGBCols)(uint8_t * dst_argb, const uint8_t* src_argb,
+ void (*ScaleARGBCols)(uint8_t* dst_argb, const uint8_t* src_argb,
int dst_width, int x, int dx) =
(src_width >= 32768) ? ScaleARGBCols64_C : ScaleARGBCols_C;
(void)src_height;
@@ -926,7 +992,7 @@ static void ScaleARGBSimple(int src_width,
}
for (j = 0; j < dst_height; ++j) {
- ScaleARGBCols(dst_argb, src_argb + (y >> 16) * (int64_t)src_stride,
+ ScaleARGBCols(dst_argb, src_argb + (y >> 16) * (intptr_t)src_stride,
dst_width, x, dx);
dst_argb += dst_stride;
y += dy;
@@ -936,19 +1002,19 @@ static void ScaleARGBSimple(int src_width,
// ScaleARGB a ARGB.
// This function in turn calls a scaling function
// suitable for handling the desired resolutions.
-static void ScaleARGB(const uint8_t* src,
- int src_stride,
- int src_width,
- int src_height,
- uint8_t* dst,
- int dst_stride,
- int dst_width,
- int dst_height,
- int clip_x,
- int clip_y,
- int clip_width,
- int clip_height,
- enum FilterMode filtering) {
+static int ScaleARGB(const uint8_t* src,
+ int src_stride,
+ int src_width,
+ int src_height,
+ uint8_t* dst,
+ int dst_stride,
+ int dst_width,
+ int dst_height,
+ int clip_x,
+ int clip_y,
+ int clip_width,
+ int clip_height,
+ enum FilterMode filtering) {
// Initial source x/y coordinate and step values as 16.16 fixed point.
int x = 0;
int y = 0;
@@ -962,7 +1028,7 @@ static void ScaleARGB(const uint8_t* src,
// Negative src_height means invert the image.
if (src_height < 0) {
src_height = -src_height;
- src = src + (src_height - 1) * (int64_t)src_stride;
+ src = src + (src_height - 1) * (intptr_t)src_stride;
src_stride = -src_stride;
}
ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y,
@@ -977,7 +1043,7 @@ static void ScaleARGB(const uint8_t* src,
if (clip_y) {
int64_t clipf = (int64_t)(clip_y)*dy;
y += (clipf & 0xffff);
- src += (clipf >> 16) * (int64_t)src_stride;
+ src += (clipf >> 16) * (intptr_t)src_stride;
dst += clip_y * dst_stride;
}
@@ -993,27 +1059,27 @@ static void ScaleARGB(const uint8_t* src,
ScaleARGBDown2(src_width, src_height, clip_width, clip_height,
src_stride, dst_stride, src, dst, x, dx, y, dy,
filtering);
- return;
+ return 0;
}
if (dx == 0x40000 && filtering == kFilterBox) {
// Optimized 1/4 box downsample.
- ScaleARGBDown4Box(src_width, src_height, clip_width, clip_height,
- src_stride, dst_stride, src, dst, x, dx, y, dy);
- return;
+ return ScaleARGBDown4Box(src_width, src_height, clip_width,
+ clip_height, src_stride, dst_stride, src,
+ dst, x, dx, y, dy);
}
ScaleARGBDownEven(src_width, src_height, clip_width, clip_height,
src_stride, dst_stride, src, dst, x, dx, y, dy,
filtering);
- return;
+ return 0;
}
// Optimized odd scale down. ie 3, 5, 7, 9x.
if ((dx & 0x10000) && (dy & 0x10000)) {
filtering = kFilterNone;
if (dx == 0x10000 && dy == 0x10000) {
// Straight copy.
- ARGBCopy(src + (y >> 16) * (int64_t)src_stride + (x >> 16) * 4,
+ ARGBCopy(src + (y >> 16) * (intptr_t)src_stride + (x >> 16) * 4,
src_stride, dst, dst_stride, clip_width, clip_height);
- return;
+ return 0;
}
}
}
@@ -1022,22 +1088,21 @@ static void ScaleARGB(const uint8_t* src,
// Arbitrary scale vertically, but unscaled horizontally.
ScalePlaneVertical(src_height, clip_width, clip_height, src_stride,
dst_stride, src, dst, x, y, dy, /*bpp=*/4, filtering);
- return;
+ return 0;
}
if (filtering && dy < 65536) {
- ScaleARGBBilinearUp(src_width, src_height, clip_width, clip_height,
- src_stride, dst_stride, src, dst, x, dx, y, dy,
- filtering);
- return;
+ return ScaleARGBBilinearUp(src_width, src_height, clip_width, clip_height,
+ src_stride, dst_stride, src, dst, x, dx, y, dy,
+ filtering);
}
if (filtering) {
- ScaleARGBBilinearDown(src_width, src_height, clip_width, clip_height,
- src_stride, dst_stride, src, dst, x, dx, y, dy,
- filtering);
- return;
+ return ScaleARGBBilinearDown(src_width, src_height, clip_width, clip_height,
+ src_stride, dst_stride, src, dst, x, dx, y, dy,
+ filtering);
}
ScaleARGBSimple(src_width, src_height, clip_width, clip_height, src_stride,
dst_stride, src, dst, x, dx, y, dy);
+ return 0;
}
LIBYUV_API
@@ -1061,10 +1126,9 @@ int ARGBScaleClip(const uint8_t* src_argb,
(clip_y + clip_height) > dst_height) {
return -1;
}
- ScaleARGB(src_argb, src_stride_argb, src_width, src_height, dst_argb,
- dst_stride_argb, dst_width, dst_height, clip_x, clip_y, clip_width,
- clip_height, filtering);
- return 0;
+ return ScaleARGB(src_argb, src_stride_argb, src_width, src_height, dst_argb,
+ dst_stride_argb, dst_width, dst_height, clip_x, clip_y,
+ clip_width, clip_height, filtering);
}
// Scale an ARGB image.
@@ -1082,10 +1146,9 @@ int ARGBScale(const uint8_t* src_argb,
src_height > 32768 || !dst_argb || dst_width <= 0 || dst_height <= 0) {
return -1;
}
- ScaleARGB(src_argb, src_stride_argb, src_width, src_height, dst_argb,
- dst_stride_argb, dst_width, dst_height, 0, 0, dst_width, dst_height,
- filtering);
- return 0;
+ return ScaleARGB(src_argb, src_stride_argb, src_width, src_height, dst_argb,
+ dst_stride_argb, dst_width, dst_height, 0, 0, dst_width,
+ dst_height, filtering);
}
// Scale with YUV conversion to ARGB and clipping.
@@ -1109,8 +1172,11 @@ int YUVToARGBScaleClip(const uint8_t* src_y,
int clip_width,
int clip_height,
enum FilterMode filtering) {
- uint8_t* argb_buffer = (uint8_t*)malloc(src_width * src_height * 4);
int r;
+ uint8_t* argb_buffer = (uint8_t*)malloc(src_width * src_height * 4);
+ if (!argb_buffer) {
+ return 1; // Out of memory runtime error.
+ }
(void)src_fourcc; // TODO(fbarchard): implement and/or assert.
(void)dst_fourcc;
I420ToARGB(src_y, src_stride_y, src_u, src_stride_u, src_v, src_stride_v,
diff --git a/files/source/scale_common.cc b/source/scale_common.cc
index b02bdafd..d07a39af 100644
--- a/files/source/scale_common.cc
+++ b/source/scale_common.cc
@@ -23,6 +23,25 @@ namespace libyuv {
extern "C" {
#endif
+#ifdef __cplusplus
+#define STATIC_CAST(type, expr) static_cast<type>(expr)
+#else
+#define STATIC_CAST(type, expr) (type)(expr)
+#endif
+
+// TODO(fbarchard): make clamp255 preserve negative values.
+static __inline int32_t clamp255(int32_t v) {
+ return (-(v >= 255) | v) & 255;
+}
+
+// Use scale to convert lsb formats to msb, depending how many bits there are:
+// 32768 = 9 bits
+// 16384 = 10 bits
+// 4096 = 12 bits
+// 256 = 16 bits
+// TODO(fbarchard): change scale to bits
+#define C16TO8(v, scale) clamp255(((v) * (scale)) >> 16)
+
static __inline int Abs(int v) {
return v >= 0 ? v : -v;
}
@@ -62,6 +81,50 @@ void ScaleRowDown2_16_C(const uint16_t* src_ptr,
}
}
+void ScaleRowDown2_16To8_C(const uint16_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst,
+ int dst_width,
+ int scale) {
+ int x;
+ (void)src_stride;
+ assert(scale >= 256);
+ assert(scale <= 32768);
+ for (x = 0; x < dst_width - 1; x += 2) {
+ dst[0] = STATIC_CAST(uint8_t, C16TO8(src_ptr[1], scale));
+ dst[1] = STATIC_CAST(uint8_t, C16TO8(src_ptr[3], scale));
+ dst += 2;
+ src_ptr += 4;
+ }
+ if (dst_width & 1) {
+ dst[0] = STATIC_CAST(uint8_t, C16TO8(src_ptr[1], scale));
+ }
+}
+
+void ScaleRowDown2_16To8_Odd_C(const uint16_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst,
+ int dst_width,
+ int scale) {
+ int x;
+ (void)src_stride;
+ assert(scale >= 256);
+ assert(scale <= 32768);
+ dst_width -= 1;
+ for (x = 0; x < dst_width - 1; x += 2) {
+ dst[0] = STATIC_CAST(uint8_t, C16TO8(src_ptr[1], scale));
+ dst[1] = STATIC_CAST(uint8_t, C16TO8(src_ptr[3], scale));
+ dst += 2;
+ src_ptr += 4;
+ }
+ if (dst_width & 1) {
+ dst[0] = STATIC_CAST(uint8_t, C16TO8(src_ptr[1], scale));
+ dst += 1;
+ src_ptr += 2;
+ }
+ dst[0] = STATIC_CAST(uint8_t, C16TO8(src_ptr[0], scale));
+}
+
void ScaleRowDown2Linear_C(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst,
@@ -98,6 +161,52 @@ void ScaleRowDown2Linear_16_C(const uint16_t* src_ptr,
}
}
+void ScaleRowDown2Linear_16To8_C(const uint16_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst,
+ int dst_width,
+ int scale) {
+ const uint16_t* s = src_ptr;
+ int x;
+ (void)src_stride;
+ assert(scale >= 256);
+ assert(scale <= 32768);
+ for (x = 0; x < dst_width - 1; x += 2) {
+ dst[0] = STATIC_CAST(uint8_t, C16TO8((s[0] + s[1] + 1) >> 1, scale));
+ dst[1] = STATIC_CAST(uint8_t, C16TO8((s[2] + s[3] + 1) >> 1, scale));
+ dst += 2;
+ s += 4;
+ }
+ if (dst_width & 1) {
+ dst[0] = STATIC_CAST(uint8_t, C16TO8((s[0] + s[1] + 1) >> 1, scale));
+ }
+}
+
+void ScaleRowDown2Linear_16To8_Odd_C(const uint16_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst,
+ int dst_width,
+ int scale) {
+ const uint16_t* s = src_ptr;
+ int x;
+ (void)src_stride;
+ assert(scale >= 256);
+ assert(scale <= 32768);
+ dst_width -= 1;
+ for (x = 0; x < dst_width - 1; x += 2) {
+ dst[0] = STATIC_CAST(uint8_t, C16TO8((s[0] + s[1] + 1) >> 1, scale));
+ dst[1] = STATIC_CAST(uint8_t, C16TO8((s[2] + s[3] + 1) >> 1, scale));
+ dst += 2;
+ s += 4;
+ }
+ if (dst_width & 1) {
+ dst[0] = STATIC_CAST(uint8_t, C16TO8((s[0] + s[1] + 1) >> 1, scale));
+ dst += 1;
+ s += 2;
+ }
+ dst[0] = STATIC_CAST(uint8_t, C16TO8(s[0], scale));
+}
+
void ScaleRowDown2Box_C(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst,
@@ -160,6 +269,61 @@ void ScaleRowDown2Box_16_C(const uint16_t* src_ptr,
}
}
+void ScaleRowDown2Box_16To8_C(const uint16_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst,
+ int dst_width,
+ int scale) {
+ const uint16_t* s = src_ptr;
+ const uint16_t* t = src_ptr + src_stride;
+ int x;
+ assert(scale >= 256);
+ assert(scale <= 32768);
+ for (x = 0; x < dst_width - 1; x += 2) {
+ dst[0] = STATIC_CAST(uint8_t,
+ C16TO8((s[0] + s[1] + t[0] + t[1] + 2) >> 2, scale));
+ dst[1] = STATIC_CAST(uint8_t,
+ C16TO8((s[2] + s[3] + t[2] + t[3] + 2) >> 2, scale));
+ dst += 2;
+ s += 4;
+ t += 4;
+ }
+ if (dst_width & 1) {
+ dst[0] = STATIC_CAST(uint8_t,
+ C16TO8((s[0] + s[1] + t[0] + t[1] + 2) >> 2, scale));
+ }
+}
+
+void ScaleRowDown2Box_16To8_Odd_C(const uint16_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst,
+ int dst_width,
+ int scale) {
+ const uint16_t* s = src_ptr;
+ const uint16_t* t = src_ptr + src_stride;
+ int x;
+ assert(scale >= 256);
+ assert(scale <= 32768);
+ dst_width -= 1;
+ for (x = 0; x < dst_width - 1; x += 2) {
+ dst[0] = STATIC_CAST(uint8_t,
+ C16TO8((s[0] + s[1] + t[0] + t[1] + 2) >> 2, scale));
+ dst[1] = STATIC_CAST(uint8_t,
+ C16TO8((s[2] + s[3] + t[2] + t[3] + 2) >> 2, scale));
+ dst += 2;
+ s += 4;
+ t += 4;
+ }
+ if (dst_width & 1) {
+ dst[0] = STATIC_CAST(uint8_t,
+ C16TO8((s[0] + s[1] + t[0] + t[1] + 2) >> 2, scale));
+ dst += 1;
+ s += 2;
+ t += 2;
+ }
+ dst[0] = STATIC_CAST(uint8_t, C16TO8((s[0] + t[0] + 1) >> 1, scale));
+}
+
void ScaleRowDown4_C(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst,
@@ -1116,18 +1280,13 @@ void ScaleUVRowDown2_C(const uint8_t* src_uv,
ptrdiff_t src_stride,
uint8_t* dst_uv,
int dst_width) {
- const uint16_t* src = (const uint16_t*)(src_uv);
- uint16_t* dst = (uint16_t*)(dst_uv);
int x;
(void)src_stride;
- for (x = 0; x < dst_width - 1; x += 2) {
- dst[0] = src[1];
- dst[1] = src[3];
- src += 2;
- dst += 2;
- }
- if (dst_width & 1) {
- dst[0] = src[1];
+ for (x = 0; x < dst_width; ++x) {
+ dst_uv[0] = src_uv[2]; // Store the 2nd UV
+ dst_uv[1] = src_uv[3];
+ src_uv += 4;
+ dst_uv += 2;
}
}
@@ -1469,7 +1628,7 @@ void ScalePlaneVertical(int src_height,
enum FilterMode filtering) {
// TODO(fbarchard): Allow higher bpp.
int dst_width_bytes = dst_width * bpp;
- void (*InterpolateRow)(uint8_t * dst_argb, const uint8_t* src_argb,
+ void (*InterpolateRow)(uint8_t* dst_argb, const uint8_t* src_argb,
ptrdiff_t src_stride, int dst_width,
int source_y_fraction) = InterpolateRow_C;
const int max_y = (src_height > 1) ? ((src_height - 1) << 16) - 1 : 0;
@@ -1519,6 +1678,12 @@ void ScalePlaneVertical(int src_height,
}
}
#endif
+#if defined(HAS_INTERPOLATEROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ InterpolateRow = InterpolateRow_RVV;
+ }
+#endif
+
for (j = 0; j < dst_height; ++j) {
int yi;
int yf;
@@ -1548,7 +1713,7 @@ void ScalePlaneVertical_16(int src_height,
enum FilterMode filtering) {
// TODO(fbarchard): Allow higher wpp.
int dst_width_words = dst_width * wpp;
- void (*InterpolateRow)(uint16_t * dst_argb, const uint16_t* src_argb,
+ void (*InterpolateRow)(uint16_t* dst_argb, const uint16_t* src_argb,
ptrdiff_t src_stride, int dst_width,
int source_y_fraction) = InterpolateRow_16_C;
const int max_y = (src_height > 1) ? ((src_height - 1) << 16) - 1 : 0;
@@ -1627,7 +1792,7 @@ void ScalePlaneVertical_16To8(int src_height,
// TODO(fbarchard): Allow higher wpp.
int dst_width_words = dst_width * wpp;
// TODO(https://crbug.com/libyuv/931): Add NEON 32 bit and AVX2 versions.
- void (*InterpolateRow_16To8)(uint8_t * dst_argb, const uint16_t* src_argb,
+ void (*InterpolateRow_16To8)(uint8_t* dst_argb, const uint16_t* src_argb,
ptrdiff_t src_stride, int scale, int dst_width,
int source_y_fraction) = InterpolateRow_16To8_C;
const int max_y = (src_height > 1) ? ((src_height - 1) << 16) - 1 : 0;
@@ -1799,35 +1964,6 @@ void ScaleSlope(int src_width,
}
#undef CENTERSTART
-// Read 8x2 upsample with filtering and write 16x1.
-// actually reads an extra pixel, so 9x2.
-void ScaleRowUp2_16_C(const uint16_t* src_ptr,
- ptrdiff_t src_stride,
- uint16_t* dst,
- int dst_width) {
- const uint16_t* src2 = src_ptr + src_stride;
-
- int x;
- for (x = 0; x < dst_width - 1; x += 2) {
- uint16_t p0 = src_ptr[0];
- uint16_t p1 = src_ptr[1];
- uint16_t p2 = src2[0];
- uint16_t p3 = src2[1];
- dst[0] = (p0 * 9 + p1 * 3 + p2 * 3 + p3 + 8) >> 4;
- dst[1] = (p0 * 3 + p1 * 9 + p2 + p3 * 3 + 8) >> 4;
- ++src_ptr;
- ++src2;
- dst += 2;
- }
- if (dst_width & 1) {
- uint16_t p0 = src_ptr[0];
- uint16_t p1 = src_ptr[1];
- uint16_t p2 = src2[0];
- uint16_t p3 = src2[1];
- dst[0] = (p0 * 9 + p1 * 3 + p2 * 3 + p3 + 8) >> 4;
- }
-}
-
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
diff --git a/files/source/scale_gcc.cc b/source/scale_gcc.cc
index edaf2e29..17eeffad 100644
--- a/files/source/scale_gcc.cc
+++ b/source/scale_gcc.cc
@@ -1094,7 +1094,8 @@ void ScaleRowUp2_Bilinear_12_SSSE3(const uint16_t* src_ptr,
: "r"((intptr_t)(src_stride)), // %3
"r"((intptr_t)(dst_stride)), // %4
"m"(kLinearShuffleFar) // %5
- : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6");
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6",
+ "xmm7");
}
#endif
@@ -1294,7 +1295,7 @@ void ScaleRowUp2_Linear_SSSE3(const uint8_t* src_ptr,
"+r"(dst_ptr), // %1
"+r"(dst_width) // %2
: "m"(kLinearMadd31) // %3
- : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6");
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4");
}
#endif
diff --git a/files/source/scale_lsx.cc b/source/scale_lsx.cc
index bfe5e9fb..bfe5e9fb 100644
--- a/files/source/scale_lsx.cc
+++ b/source/scale_lsx.cc
diff --git a/files/source/scale_msa.cc b/source/scale_msa.cc
index 482a521f..482a521f 100644
--- a/files/source/scale_msa.cc
+++ b/source/scale_msa.cc
diff --git a/files/source/scale_neon.cc b/source/scale_neon.cc
index 6a0d6e1b..ccc75106 100644
--- a/files/source/scale_neon.cc
+++ b/source/scale_neon.cc
@@ -1428,6 +1428,45 @@ void ScaleARGBFilterCols_NEON(uint8_t* dst_argb,
#undef LOAD2_DATA32_LANE
+void ScaleUVRowDown2_NEON(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst,
+ int dst_width) {
+ (void)src_stride;
+ asm volatile(
+ "1: \n"
+ "vld2.16 {d0, d2}, [%0]! \n" // load 8 UV pixels.
+ "vld2.16 {d1, d3}, [%0]! \n" // load next 8 UV
+ "subs %2, %2, #8 \n" // 8 processed per loop.
+ "vst1.16 {q1}, [%1]! \n" // store 8 UV
+ "bgt 1b \n"
+ : "+r"(src_ptr), // %0
+ "+r"(dst), // %1
+ "+r"(dst_width) // %2
+ :
+ : "memory", "cc", "q0", "q1");
+}
+
+void ScaleUVRowDown2Linear_NEON(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst,
+ int dst_width) {
+ (void)src_stride;
+ asm volatile(
+ "1: \n"
+ "vld2.16 {d0, d2}, [%0]! \n" // load 8 UV pixels.
+ "vld2.16 {d1, d3}, [%0]! \n" // load next 8 UV
+ "subs %2, %2, #8 \n" // 8 processed per loop.
+ "vrhadd.u8 q0, q0, q1 \n" // rounding half add
+ "vst1.16 {q0}, [%1]! \n" // store 8 UV
+ "bgt 1b \n"
+ : "+r"(src_ptr), // %0
+ "+r"(dst), // %1
+ "+r"(dst_width) // %2
+ :
+ : "memory", "cc", "q0", "q1");
+}
+
void ScaleUVRowDown2Box_NEON(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst,
diff --git a/files/source/scale_neon64.cc b/source/scale_neon64.cc
index 9f9636e6..7c072380 100644
--- a/files/source/scale_neon64.cc
+++ b/source/scale_neon64.cc
@@ -1118,101 +1118,6 @@ void ScaleFilterCols_NEON(uint8_t* dst_ptr,
#undef LOAD2_DATA8_LANE
-// 16x2 -> 16x1
-void ScaleFilterRows_NEON(uint8_t* dst_ptr,
- const uint8_t* src_ptr,
- ptrdiff_t src_stride,
- int dst_width,
- int source_y_fraction) {
- int y_fraction = 256 - source_y_fraction;
- asm volatile(
- "cmp %w4, #0 \n"
- "b.eq 100f \n"
- "add %2, %2, %1 \n"
- "cmp %w4, #64 \n"
- "b.eq 75f \n"
- "cmp %w4, #128 \n"
- "b.eq 50f \n"
- "cmp %w4, #192 \n"
- "b.eq 25f \n"
-
- "dup v5.8b, %w4 \n"
- "dup v4.8b, %w5 \n"
- // General purpose row blend.
- "1: \n"
- "ld1 {v0.16b}, [%1], #16 \n"
- "ld1 {v1.16b}, [%2], #16 \n"
- "subs %w3, %w3, #16 \n"
- "umull v6.8h, v0.8b, v4.8b \n"
- "umull2 v7.8h, v0.16b, v4.16b \n"
- "prfm pldl1keep, [%1, 448] \n" // prefetch 7 lines ahead
- "umlal v6.8h, v1.8b, v5.8b \n"
- "umlal2 v7.8h, v1.16b, v5.16b \n"
- "prfm pldl1keep, [%2, 448] \n"
- "rshrn v0.8b, v6.8h, #8 \n"
- "rshrn2 v0.16b, v7.8h, #8 \n"
- "st1 {v0.16b}, [%0], #16 \n"
- "b.gt 1b \n"
- "b 99f \n"
-
- // Blend 25 / 75.
- "25: \n"
- "ld1 {v0.16b}, [%1], #16 \n"
- "ld1 {v1.16b}, [%2], #16 \n"
- "subs %w3, %w3, #16 \n"
- "urhadd v0.16b, v0.16b, v1.16b \n"
- "prfm pldl1keep, [%1, 448] \n" // prefetch 7 lines ahead
- "urhadd v0.16b, v0.16b, v1.16b \n"
- "prfm pldl1keep, [%2, 448] \n"
- "st1 {v0.16b}, [%0], #16 \n"
- "b.gt 25b \n"
- "b 99f \n"
-
- // Blend 50 / 50.
- "50: \n"
- "ld1 {v0.16b}, [%1], #16 \n"
- "ld1 {v1.16b}, [%2], #16 \n"
- "subs %w3, %w3, #16 \n"
- "prfm pldl1keep, [%1, 448] \n" // prefetch 7 lines ahead
- "urhadd v0.16b, v0.16b, v1.16b \n"
- "prfm pldl1keep, [%2, 448] \n"
- "st1 {v0.16b}, [%0], #16 \n"
- "b.gt 50b \n"
- "b 99f \n"
-
- // Blend 75 / 25.
- "75: \n"
- "ld1 {v1.16b}, [%1], #16 \n"
- "ld1 {v0.16b}, [%2], #16 \n"
- "subs %w3, %w3, #16 \n"
- "urhadd v0.16b, v0.16b, v1.16b \n"
- "prfm pldl1keep, [%1, 448] \n" // prefetch 7 lines ahead
- "urhadd v0.16b, v0.16b, v1.16b \n"
- "prfm pldl1keep, [%2, 448] \n"
- "st1 {v0.16b}, [%0], #16 \n"
- "b.gt 75b \n"
- "b 99f \n"
-
- // Blend 100 / 0 - Copy row unchanged.
- "100: \n"
- "ld1 {v0.16b}, [%1], #16 \n"
- "subs %w3, %w3, #16 \n"
- "prfm pldl1keep, [%1, 448] \n" // prefetch 7 lines ahead
- "st1 {v0.16b}, [%0], #16 \n"
- "b.gt 100b \n"
-
- "99: \n"
- "st1 {v0.b}[15], [%0] \n"
- : "+r"(dst_ptr), // %0
- "+r"(src_ptr), // %1
- "+r"(src_stride), // %2
- "+r"(dst_width), // %3
- "+r"(source_y_fraction), // %4
- "+r"(y_fraction) // %5
- :
- : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "memory", "cc");
-}
-
void ScaleARGBRowDown2_NEON(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst,
@@ -1568,6 +1473,45 @@ void ScaleRowUp2_16_NEON(const uint16_t* src_ptr,
);
}
+void ScaleUVRowDown2_NEON(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst,
+ int dst_width) {
+ (void)src_stride;
+ asm volatile(
+ "1: \n"
+ "ld2 {v0.8h,v1.8h}, [%0], #32 \n" // load 16 UV
+ "subs %w2, %w2, #8 \n" // 8 processed per loop.
+ "prfm pldl1keep, [%0, 448] \n" // prefetch 7 lines ahead
+ "st1 {v1.8h}, [%1], #16 \n" // store 8 UV
+ "b.gt 1b \n"
+ : "+r"(src_ptr), // %0
+ "+r"(dst), // %1
+ "+r"(dst_width) // %2
+ :
+ : "memory", "cc", "v0", "v1");
+}
+
+void ScaleUVRowDown2Linear_NEON(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst,
+ int dst_width) {
+ (void)src_stride;
+ asm volatile(
+ "1: \n"
+ "ld2 {v0.8h,v1.8h}, [%0], #32 \n" // load 16 UV
+ "subs %w2, %w2, #8 \n" // 8 processed per loop.
+ "urhadd v0.16b, v0.16b, v1.16b \n" // rounding half add
+ "prfm pldl1keep, [%0, 448] \n" // prefetch 7 lines ahead
+ "st1 {v0.8h}, [%1], #16 \n" // store 8 UV
+ "b.gt 1b \n"
+ : "+r"(src_ptr), // %0
+ "+r"(dst), // %1
+ "+r"(dst_width) // %2
+ :
+ : "memory", "cc", "v0", "v1");
+}
+
void ScaleUVRowDown2Box_NEON(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst,
diff --git a/files/source/scale_rgb.cc b/source/scale_rgb.cc
index 8db59b56..8db59b56 100644
--- a/files/source/scale_rgb.cc
+++ b/source/scale_rgb.cc
diff --git a/source/scale_rvv.cc b/source/scale_rvv.cc
new file mode 100644
index 00000000..de037e45
--- /dev/null
+++ b/source/scale_rvv.cc
@@ -0,0 +1,1040 @@
+/*
+ * Copyright 2023 The LibYuv Project Authors. All rights reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+/*
+ * Copyright (c) 2023 SiFive, Inc. All rights reserved.
+ *
+ * Contributed by Darren Hsieh <darren.hsieh@sifive.com>
+ * Contributed by Bruce Lai <bruce.lai@sifive.com>
+ */
+
+#include "libyuv/row.h"
+#include "libyuv/scale_row.h"
+
+// This module is for clang rvv. GCC hasn't supported segment load & store.
+#if !defined(LIBYUV_DISABLE_RVV) && defined(__riscv_vector) && \
+ defined(__clang__)
+#include <assert.h>
+#include <riscv_vector.h>
+#ifdef __cplusplus
+namespace libyuv {
+extern "C" {
+#endif
+
+#ifdef HAS_SCALEADDROW_RVV
+void ScaleAddRow_RVV(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width) {
+ size_t w = (size_t)src_width;
+ do {
+ size_t vl = __riscv_vsetvl_e8m4(w);
+ vuint8m4_t v_src = __riscv_vle8_v_u8m4(src_ptr, vl);
+ vuint16m8_t v_dst = __riscv_vle16_v_u16m8(dst_ptr, vl);
+ // Use widening multiply-add instead of widening + add
+ v_dst = __riscv_vwmaccu_vx_u16m8(v_dst, 1, v_src, vl);
+ __riscv_vse16_v_u16m8(dst_ptr, v_dst, vl);
+ w -= vl;
+ src_ptr += vl;
+ dst_ptr += vl;
+ } while (w > 0);
+}
+#endif
+
+#ifdef HAS_SCALEARGBROWDOWN2_RVV
+void ScaleARGBRowDown2_RVV(const uint8_t* src_argb,
+ ptrdiff_t src_stride,
+ uint8_t* dst_argb,
+ int dst_width) {
+ (void)src_stride;
+ size_t w = (size_t)dst_width;
+ const uint64_t* src = (const uint64_t*)(src_argb);
+ uint32_t* dst = (uint32_t*)(dst_argb);
+ do {
+ size_t vl = __riscv_vsetvl_e64m8(w);
+ vuint64m8_t v_data = __riscv_vle64_v_u64m8(src, vl);
+ vuint32m4_t v_dst = __riscv_vnsrl_wx_u32m4(v_data, 32, vl);
+ __riscv_vse32_v_u32m4(dst, v_dst, vl);
+ w -= vl;
+ src += vl;
+ dst += vl;
+ } while (w > 0);
+}
+#endif
+
+#ifdef HAS_SCALEARGBROWDOWN2LINEAR_RVV
+void ScaleARGBRowDown2Linear_RVV(const uint8_t* src_argb,
+ ptrdiff_t src_stride,
+ uint8_t* dst_argb,
+ int dst_width) {
+ (void)src_stride;
+ size_t w = (size_t)dst_width;
+ const uint32_t* src = (const uint32_t*)(src_argb);
+ // NOTE: To match behavior on other platforms, vxrm (fixed-point rounding mode
+ // register) is set to round-to-nearest-up mode(0).
+ asm volatile("csrwi vxrm, 0");
+ do {
+ vuint8m4_t v_odd, v_even, v_dst;
+ vuint32m4_t v_odd_32, v_even_32;
+ size_t vl = __riscv_vsetvl_e32m4(w);
+ __riscv_vlseg2e32_v_u32m4(&v_even_32, &v_odd_32, src, vl);
+ v_even = __riscv_vreinterpret_v_u32m4_u8m4(v_even_32);
+ v_odd = __riscv_vreinterpret_v_u32m4_u8m4(v_odd_32);
+ // Use round-to-nearest-up mode for averaging add
+ v_dst = __riscv_vaaddu_vv_u8m4(v_even, v_odd, vl * 4);
+ __riscv_vse8_v_u8m4(dst_argb, v_dst, vl * 4);
+ w -= vl;
+ src += vl * 2;
+ dst_argb += vl * 4;
+ } while (w > 0);
+}
+#endif
+
+#ifdef HAS_SCALEARGBROWDOWN2BOX_RVV
+void ScaleARGBRowDown2Box_RVV(const uint8_t* src_argb,
+ ptrdiff_t src_stride,
+ uint8_t* dst_argb,
+ int dst_width) {
+ size_t w = (size_t)dst_width;
+ const uint32_t* src0 = (const uint32_t*)(src_argb);
+ const uint32_t* src1 = (const uint32_t*)(src_argb + src_stride);
+ // NOTE: To match behavior on other platforms, vxrm (fixed-point rounding mode
+ // register) is set to round-to-nearest-up mode(0).
+ asm volatile("csrwi vxrm, 0");
+ do {
+ vuint8m4_t v_row0_odd, v_row0_even, v_row1_odd, v_row1_even, v_dst;
+ vuint16m8_t v_row0_sum, v_row1_sum, v_dst_16;
+ vuint32m4_t v_row0_odd_32, v_row0_even_32, v_row1_odd_32, v_row1_even_32;
+ size_t vl = __riscv_vsetvl_e32m4(w);
+ __riscv_vlseg2e32_v_u32m4(&v_row0_even_32, &v_row0_odd_32, src0, vl);
+ __riscv_vlseg2e32_v_u32m4(&v_row1_even_32, &v_row1_odd_32, src1, vl);
+ v_row0_even = __riscv_vreinterpret_v_u32m4_u8m4(v_row0_even_32);
+ v_row0_odd = __riscv_vreinterpret_v_u32m4_u8m4(v_row0_odd_32);
+ v_row1_even = __riscv_vreinterpret_v_u32m4_u8m4(v_row1_even_32);
+ v_row1_odd = __riscv_vreinterpret_v_u32m4_u8m4(v_row1_odd_32);
+ v_row0_sum = __riscv_vwaddu_vv_u16m8(v_row0_even, v_row0_odd, vl * 4);
+ v_row1_sum = __riscv_vwaddu_vv_u16m8(v_row1_even, v_row1_odd, vl * 4);
+ v_dst_16 = __riscv_vadd_vv_u16m8(v_row0_sum, v_row1_sum, vl * 4);
+ // Use round-to-nearest-up mode for vnclip
+ v_dst = __riscv_vnclipu_wx_u8m4(v_dst_16, 2, vl * 4);
+ __riscv_vse8_v_u8m4(dst_argb, v_dst, vl * 4);
+ w -= vl;
+ src0 += vl * 2;
+ src1 += vl * 2;
+ dst_argb += vl * 4;
+ } while (w > 0);
+}
+#endif
+
+#ifdef HAS_SCALEARGBROWDOWNEVEN_RVV
+void ScaleARGBRowDownEven_RVV(const uint8_t* src_argb,
+ ptrdiff_t src_stride,
+ int src_stepx,
+ uint8_t* dst_argb,
+ int dst_width) {
+ size_t w = (size_t)dst_width;
+ const uint32_t* src = (const uint32_t*)(src_argb);
+ uint32_t* dst = (uint32_t*)(dst_argb);
+ const int stride_byte = src_stepx * 4;
+ do {
+ size_t vl = __riscv_vsetvl_e32m8(w);
+ vuint32m8_t v_row = __riscv_vlse32_v_u32m8(src, stride_byte, vl);
+ __riscv_vse32_v_u32m8(dst, v_row, vl);
+ w -= vl;
+ src += vl * src_stepx;
+ dst += vl;
+ } while (w > 0);
+}
+#endif
+
+#ifdef HAS_SCALEARGBROWDOWNEVENBOX_RVV
+void ScaleARGBRowDownEvenBox_RVV(const uint8_t* src_argb,
+ ptrdiff_t src_stride,
+ int src_stepx,
+ uint8_t* dst_argb,
+ int dst_width) {
+ size_t w = (size_t)dst_width;
+ const uint32_t* src0 = (const uint32_t*)(src_argb);
+ const uint32_t* src1 = (const uint32_t*)(src_argb + src_stride);
+ const int stride_byte = src_stepx * 4;
+ // NOTE: To match behavior on other platforms, vxrm (fixed-point rounding mode
+ // register) is set to round-to-nearest-up mode(0).
+ asm volatile("csrwi vxrm, 0");
+ do {
+ vuint8m4_t v_row0_low, v_row0_high, v_row1_low, v_row1_high, v_dst;
+ vuint16m8_t v_row0_sum, v_row1_sum, v_sum;
+ vuint32m4_t v_row0_low_32, v_row0_high_32, v_row1_low_32, v_row1_high_32;
+ size_t vl = __riscv_vsetvl_e32m4(w);
+ __riscv_vlsseg2e32_v_u32m4(&v_row0_low_32, &v_row0_high_32, src0,
+ stride_byte, vl);
+ __riscv_vlsseg2e32_v_u32m4(&v_row1_low_32, &v_row1_high_32, src1,
+ stride_byte, vl);
+ v_row0_low = __riscv_vreinterpret_v_u32m4_u8m4(v_row0_low_32);
+ v_row0_high = __riscv_vreinterpret_v_u32m4_u8m4(v_row0_high_32);
+ v_row1_low = __riscv_vreinterpret_v_u32m4_u8m4(v_row1_low_32);
+ v_row1_high = __riscv_vreinterpret_v_u32m4_u8m4(v_row1_high_32);
+ v_row0_sum = __riscv_vwaddu_vv_u16m8(v_row0_low, v_row0_high, vl * 4);
+ v_row1_sum = __riscv_vwaddu_vv_u16m8(v_row1_low, v_row1_high, vl * 4);
+ v_sum = __riscv_vadd_vv_u16m8(v_row0_sum, v_row1_sum, vl * 4);
+ // Use round-to-nearest-up mode for vnclip
+ v_dst = __riscv_vnclipu_wx_u8m4(v_sum, 2, vl * 4);
+ __riscv_vse8_v_u8m4(dst_argb, v_dst, vl * 4);
+ w -= vl;
+ src0 += vl * src_stepx;
+ src1 += vl * src_stepx;
+ dst_argb += vl * 4;
+ } while (w > 0);
+}
+#endif
+
+#ifdef HAS_SCALEROWDOWN2_RVV
+void ScaleRowDown2_RVV(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst,
+ int dst_width) {
+ size_t w = (size_t)dst_width;
+ const uint16_t* src = (const uint16_t*)src_ptr;
+ (void)src_stride;
+ do {
+ size_t vl = __riscv_vsetvl_e16m8(w);
+ vuint16m8_t v_src = __riscv_vle16_v_u16m8(src, vl);
+ vuint8m4_t v_dst = __riscv_vnsrl_wx_u8m4(v_src, 8, vl);
+ __riscv_vse8_v_u8m4(dst, v_dst, vl);
+ w -= vl;
+ src += vl;
+ dst += vl;
+ } while (w > 0);
+}
+#endif
+
+#ifdef HAS_SCALEROWDOWN2LINEAR_RVV
+void ScaleRowDown2Linear_RVV(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst,
+ int dst_width) {
+ size_t w = (size_t)dst_width;
+ (void)src_stride;
+ // NOTE: To match behavior on other platforms, vxrm (fixed-point rounding mode
+ // register) is set to round-to-nearest-up mode(0).
+ asm volatile("csrwi vxrm, 0");
+ do {
+ vuint8m4_t v_s0, v_s1, v_dst;
+ size_t vl = __riscv_vsetvl_e8m4(w);
+ __riscv_vlseg2e8_v_u8m4(&v_s0, &v_s1, src_ptr, vl);
+ // Use round-to-nearest-up mode for averaging add
+ v_dst = __riscv_vaaddu_vv_u8m4(v_s0, v_s1, vl);
+ __riscv_vse8_v_u8m4(dst, v_dst, vl);
+ w -= vl;
+ src_ptr += 2 * vl;
+ dst += vl;
+ } while (w > 0);
+}
+#endif
+
+#ifdef HAS_SCALEROWDOWN2BOX_RVV
+void ScaleRowDown2Box_RVV(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst,
+ int dst_width) {
+ const uint8_t* s = src_ptr;
+ const uint8_t* t = src_ptr + src_stride;
+ size_t w = (size_t)dst_width;
+ // NOTE: To match behavior on other platforms, vxrm (fixed-point rounding mode
+ // register) is set to round-to-nearest-up mode(0).
+ asm volatile("csrwi vxrm, 0");
+ do {
+ size_t vl = __riscv_vsetvl_e8m4(w);
+ vuint8m4_t v_s0, v_s1, v_t0, v_t1;
+ vuint16m8_t v_s01, v_t01, v_st01;
+ vuint8m4_t v_dst;
+ __riscv_vlseg2e8_v_u8m4(&v_s0, &v_s1, s, vl);
+ __riscv_vlseg2e8_v_u8m4(&v_t0, &v_t1, t, vl);
+ v_s01 = __riscv_vwaddu_vv_u16m8(v_s0, v_s1, vl);
+ v_t01 = __riscv_vwaddu_vv_u16m8(v_t0, v_t1, vl);
+ v_st01 = __riscv_vadd_vv_u16m8(v_s01, v_t01, vl);
+ // Use round-to-nearest-up mode for vnclip
+ v_dst = __riscv_vnclipu_wx_u8m4(v_st01, 2, vl);
+ __riscv_vse8_v_u8m4(dst, v_dst, vl);
+ w -= vl;
+ s += 2 * vl;
+ t += 2 * vl;
+ dst += vl;
+ } while (w > 0);
+}
+#endif
+
+#ifdef HAS_SCALEROWDOWN4_RVV
+void ScaleRowDown4_RVV(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width) {
+ size_t w = (size_t)dst_width;
+ (void)src_stride;
+ do {
+ size_t vl = __riscv_vsetvl_e8m2(w);
+ vuint8m2_t v_s0, v_s1, v_s2, v_s3;
+ __riscv_vlseg4e8_v_u8m2(&v_s0, &v_s1, &v_s2, &v_s3, src_ptr, vl);
+ __riscv_vse8_v_u8m2(dst_ptr, v_s2, vl);
+ w -= vl;
+ src_ptr += (4 * vl);
+ dst_ptr += vl;
+ } while (w > 0);
+}
+#endif
+
+#ifdef HAS_SCALEROWDOWN4BOX_RVV
+void ScaleRowDown4Box_RVV(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width) {
+ const uint8_t* src_ptr1 = src_ptr + src_stride;
+ const uint8_t* src_ptr2 = src_ptr + src_stride * 2;
+ const uint8_t* src_ptr3 = src_ptr + src_stride * 3;
+ size_t w = (size_t)dst_width;
+ // NOTE: To match behavior on other platforms, vxrm (fixed-point rounding mode
+ // register) is set to round-to-nearest-up mode(0).
+ asm volatile("csrwi vxrm, 0");
+ do {
+ vuint8m2_t v_s0, v_s1, v_s2, v_s3;
+ vuint8m2_t v_t0, v_t1, v_t2, v_t3;
+ vuint8m2_t v_u0, v_u1, v_u2, v_u3;
+ vuint8m2_t v_v0, v_v1, v_v2, v_v3;
+ vuint16m4_t v_s01, v_s23, v_t01, v_t23;
+ vuint16m4_t v_u01, v_u23, v_v01, v_v23;
+ vuint16m4_t v_st01, v_st23, v_uv01, v_uv23;
+ vuint16m4_t v_st0123, v_uv0123, v_stuv0123;
+ vuint8m2_t v_dst;
+ size_t vl = __riscv_vsetvl_e8m2(w);
+
+ __riscv_vlseg4e8_v_u8m2(&v_s0, &v_s1, &v_s2, &v_s3, src_ptr, vl);
+ v_s01 = __riscv_vwaddu_vv_u16m4(v_s0, v_s1, vl);
+
+ __riscv_vlseg4e8_v_u8m2(&v_t0, &v_t1, &v_t2, &v_t3, src_ptr1, vl);
+ v_t01 = __riscv_vwaddu_vv_u16m4(v_t0, v_t1, vl);
+
+ __riscv_vlseg4e8_v_u8m2(&v_u0, &v_u1, &v_u2, &v_u3, src_ptr2, vl);
+ v_u01 = __riscv_vwaddu_vv_u16m4(v_u0, v_u1, vl);
+ v_u23 = __riscv_vwaddu_vv_u16m4(v_u2, v_u3, vl);
+
+ v_s23 = __riscv_vwaddu_vv_u16m4(v_s2, v_s3, vl);
+ v_t23 = __riscv_vwaddu_vv_u16m4(v_t2, v_t3, vl);
+ v_st01 = __riscv_vadd_vv_u16m4(v_s01, v_t01, vl);
+ v_st23 = __riscv_vadd_vv_u16m4(v_s23, v_t23, vl);
+
+ __riscv_vlseg4e8_v_u8m2(&v_v0, &v_v1, &v_v2, &v_v3, src_ptr3, vl);
+
+ v_v01 = __riscv_vwaddu_vv_u16m4(v_v0, v_v1, vl);
+ v_v23 = __riscv_vwaddu_vv_u16m4(v_v2, v_v3, vl);
+
+ v_uv01 = __riscv_vadd_vv_u16m4(v_u01, v_v01, vl);
+ v_uv23 = __riscv_vadd_vv_u16m4(v_u23, v_v23, vl);
+
+ v_st0123 = __riscv_vadd_vv_u16m4(v_st01, v_st23, vl);
+ v_uv0123 = __riscv_vadd_vv_u16m4(v_uv01, v_uv23, vl);
+ v_stuv0123 = __riscv_vadd_vv_u16m4(v_st0123, v_uv0123, vl);
+ // Use round-to-nearest-up mode for vnclip
+ v_dst = __riscv_vnclipu_wx_u8m2(v_stuv0123, 4, vl);
+ __riscv_vse8_v_u8m2(dst_ptr, v_dst, vl);
+ w -= vl;
+ src_ptr += 4 * vl;
+ src_ptr1 += 4 * vl;
+ src_ptr2 += 4 * vl;
+ src_ptr3 += 4 * vl;
+ dst_ptr += vl;
+ } while (w > 0);
+}
+#endif
+
+#ifdef HAS_SCALEROWDOWN34_RVV
+void ScaleRowDown34_RVV(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width) {
+ size_t w = (size_t)dst_width / 3u;
+ do {
+ size_t vl = __riscv_vsetvl_e8m2(w);
+ vuint8m2_t v_s0, v_s1, v_s2, v_s3;
+ __riscv_vlseg4e8_v_u8m2(&v_s0, &v_s1, &v_s2, &v_s3, src_ptr, vl);
+ __riscv_vsseg3e8_v_u8m2(dst_ptr, v_s0, v_s1, v_s3, vl);
+ w -= vl;
+ src_ptr += 4 * vl;
+ dst_ptr += 3 * vl;
+ } while (w > 0);
+}
+#endif
+
+#ifdef HAS_SCALEROWDOWN34_0_BOX_RVV
+void ScaleRowDown34_0_Box_RVV(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width) {
+ size_t w = (size_t)dst_width / 3u;
+ const uint8_t* s = src_ptr;
+ const uint8_t* t = src_ptr + src_stride;
+ // NOTE: To match behavior on other platforms, vxrm (fixed-point rounding mode
+ // register) is set to round-to-nearest-up mode(0).
+ asm volatile("csrwi vxrm, 0");
+ do {
+ vuint8m2_t v_s0, v_s1, v_s2, v_s3;
+ vuint16m4_t v_t0_u16, v_t1_u16, v_t2_u16, v_t3_u16;
+ vuint8m2_t v_u0, v_u1, v_u2, v_u3;
+ vuint16m4_t v_u1_u16;
+ vuint8m2_t v_a0, v_a1, v_a2;
+ size_t vl = __riscv_vsetvl_e8m2(w);
+ __riscv_vlseg4e8_v_u8m2(&v_s0, &v_s1, &v_s2, &v_s3, s, vl);
+
+ if (src_stride == 0) {
+ v_t0_u16 = __riscv_vwaddu_vx_u16m4(v_s0, 2, vl);
+ v_t1_u16 = __riscv_vwaddu_vx_u16m4(v_s1, 2, vl);
+ v_t2_u16 = __riscv_vwaddu_vx_u16m4(v_s2, 2, vl);
+ v_t3_u16 = __riscv_vwaddu_vx_u16m4(v_s3, 2, vl);
+ } else {
+ vuint8m2_t v_t0, v_t1, v_t2, v_t3;
+ __riscv_vlseg4e8_v_u8m2(&v_t0, &v_t1, &v_t2, &v_t3, t, vl);
+ v_t0_u16 = __riscv_vwaddu_vx_u16m4(v_t0, 0, vl);
+ v_t1_u16 = __riscv_vwaddu_vx_u16m4(v_t1, 0, vl);
+ v_t2_u16 = __riscv_vwaddu_vx_u16m4(v_t2, 0, vl);
+ v_t3_u16 = __riscv_vwaddu_vx_u16m4(v_t3, 0, vl);
+ t += 4 * vl;
+ }
+
+ v_t0_u16 = __riscv_vwmaccu_vx_u16m4(v_t0_u16, 3, v_s0, vl);
+ v_t1_u16 = __riscv_vwmaccu_vx_u16m4(v_t1_u16, 3, v_s1, vl);
+ v_t2_u16 = __riscv_vwmaccu_vx_u16m4(v_t2_u16, 3, v_s2, vl);
+ v_t3_u16 = __riscv_vwmaccu_vx_u16m4(v_t3_u16, 3, v_s3, vl);
+
+ // Use round-to-nearest-up mode for vnclip & averaging add
+ v_u0 = __riscv_vnclipu_wx_u8m2(v_t0_u16, 2, vl);
+ v_u1 = __riscv_vnclipu_wx_u8m2(v_t1_u16, 2, vl);
+ v_u2 = __riscv_vnclipu_wx_u8m2(v_t2_u16, 2, vl);
+ v_u3 = __riscv_vnclipu_wx_u8m2(v_t3_u16, 2, vl);
+
+ // a0 = (src[0] * 3 + s[1] * 1 + 2) >> 2
+ v_u1_u16 = __riscv_vwaddu_vx_u16m4(v_u1, 0, vl);
+ v_u1_u16 = __riscv_vwmaccu_vx_u16m4(v_u1_u16, 3, v_u0, vl);
+ v_a0 = __riscv_vnclipu_wx_u8m2(v_u1_u16, 2, vl);
+
+ // a1 = (src[1] * 1 + s[2] * 1 + 1) >> 1
+ v_a1 = __riscv_vaaddu_vv_u8m2(v_u1, v_u2, vl);
+
+ // a2 = (src[2] * 1 + s[3] * 3 + 2) >> 2
+ v_u1_u16 = __riscv_vwaddu_vx_u16m4(v_u2, 0, vl);
+ v_u1_u16 = __riscv_vwmaccu_vx_u16m4(v_u1_u16, 3, v_u3, vl);
+ v_a2 = __riscv_vnclipu_wx_u8m2(v_u1_u16, 2, vl);
+
+ __riscv_vsseg3e8_v_u8m2(dst_ptr, v_a0, v_a1, v_a2, vl);
+
+ w -= vl;
+ s += 4 * vl;
+ dst_ptr += 3 * vl;
+ } while (w > 0);
+}
+#endif
+
+#ifdef HAS_SCALEROWDOWN34_1_BOX_RVV
+void ScaleRowDown34_1_Box_RVV(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width) {
+ size_t w = (size_t)dst_width / 3u;
+ const uint8_t* s = src_ptr;
+ const uint8_t* t = src_ptr + src_stride;
+ // NOTE: To match behavior on other platforms, vxrm (fixed-point rounding mode
+ // register) is set to round-to-nearest-up mode(0).
+ asm volatile("csrwi vxrm, 0");
+ do {
+ vuint8m2_t v_s0, v_s1, v_s2, v_s3;
+ vuint8m2_t v_ave0, v_ave1, v_ave2, v_ave3;
+ vuint16m4_t v_u1_u16;
+ vuint8m2_t v_a0, v_a1, v_a2;
+ size_t vl = __riscv_vsetvl_e8m2(w);
+ __riscv_vlseg4e8_v_u8m2(&v_s0, &v_s1, &v_s2, &v_s3, s, vl);
+
+ // Use round-to-nearest-up mode for vnclip & averaging add
+ if (src_stride == 0) {
+ v_ave0 = __riscv_vaaddu_vv_u8m2(v_s0, v_s0, vl);
+ v_ave1 = __riscv_vaaddu_vv_u8m2(v_s1, v_s1, vl);
+ v_ave2 = __riscv_vaaddu_vv_u8m2(v_s2, v_s2, vl);
+ v_ave3 = __riscv_vaaddu_vv_u8m2(v_s3, v_s3, vl);
+ } else {
+ vuint8m2_t v_t0, v_t1, v_t2, v_t3;
+ __riscv_vlseg4e8_v_u8m2(&v_t0, &v_t1, &v_t2, &v_t3, t, vl);
+ v_ave0 = __riscv_vaaddu_vv_u8m2(v_s0, v_t0, vl);
+ v_ave1 = __riscv_vaaddu_vv_u8m2(v_s1, v_t1, vl);
+ v_ave2 = __riscv_vaaddu_vv_u8m2(v_s2, v_t2, vl);
+ v_ave3 = __riscv_vaaddu_vv_u8m2(v_s3, v_t3, vl);
+ t += 4 * vl;
+ }
+ // a0 = (src[0] * 3 + s[1] * 1 + 2) >> 2
+ v_u1_u16 = __riscv_vwaddu_vx_u16m4(v_ave1, 0, vl);
+ v_u1_u16 = __riscv_vwmaccu_vx_u16m4(v_u1_u16, 3, v_ave0, vl);
+ v_a0 = __riscv_vnclipu_wx_u8m2(v_u1_u16, 2, vl);
+
+ // a1 = (src[1] * 1 + s[2] * 1 + 1) >> 1
+ v_a1 = __riscv_vaaddu_vv_u8m2(v_ave1, v_ave2, vl);
+
+ // a2 = (src[2] * 1 + s[3] * 3 + 2) >> 2
+ v_u1_u16 = __riscv_vwaddu_vx_u16m4(v_ave2, 0, vl);
+ v_u1_u16 = __riscv_vwmaccu_vx_u16m4(v_u1_u16, 3, v_ave3, vl);
+ v_a2 = __riscv_vnclipu_wx_u8m2(v_u1_u16, 2, vl);
+
+ __riscv_vsseg3e8_v_u8m2(dst_ptr, v_a0, v_a1, v_a2, vl);
+
+ w -= vl;
+ s += 4 * vl;
+ dst_ptr += 3 * vl;
+ } while (w > 0);
+}
+#endif
+
+#ifdef HAS_SCALEROWDOWN38_RVV
+void ScaleRowDown38_RVV(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width) {
+ size_t w = (size_t)dst_width / 3u;
+ (void)src_stride;
+ assert(dst_width % 3 == 0);
+ do {
+ vuint8m1_t v_s0, v_s1, v_s2, v_s3, v_s4, v_s5, v_s6, v_s7;
+ size_t vl = __riscv_vsetvl_e8m1(w);
+ __riscv_vlseg8e8_v_u8m1(&v_s0, &v_s1, &v_s2, &v_s3, &v_s4, &v_s5, &v_s6,
+ &v_s7, src_ptr, vl);
+ __riscv_vsseg3e8_v_u8m1(dst_ptr, v_s0, v_s3, v_s6, vl);
+ w -= vl;
+ src_ptr += 8 * vl;
+ dst_ptr += 3 * vl;
+ } while (w > 0);
+}
+#endif
+
+#ifdef HAS_SCALEROWDOWN38_2_BOX_RVV
+void ScaleRowDown38_2_Box_RVV(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width) {
+ size_t w = (size_t)dst_width / 3u;
+ const uint16_t coeff_a = (65536u / 6u);
+ const uint16_t coeff_b = (65536u / 4u);
+ assert((dst_width % 3 == 0) && (dst_width > 0));
+ do {
+ vuint8m1_t v_s0, v_s1, v_s2, v_s3, v_s4, v_s5, v_s6, v_s7;
+ vuint8m1_t v_t0, v_t1, v_t2, v_t3, v_t4, v_t5, v_t6, v_t7;
+ vuint16m2_t v_e0, v_e1, v_e2, v_e;
+ vuint16m2_t v_f0, v_f1, v_f2, v_f;
+ vuint16m2_t v_g0, v_g1, v_g;
+ vuint8m1_t v_dst_e, v_dst_f, v_dst_g;
+ size_t vl = __riscv_vsetvl_e8m1(w);
+ // s: e00, e10, e20, f00, f10, f20, g00, g10
+ // t: e01, e11, e21, f01, f11, f21, g01, g11
+ __riscv_vlseg8e8_v_u8m1(&v_s0, &v_s1, &v_s2, &v_s3, &v_s4, &v_s5, &v_s6,
+ &v_s7, src_ptr, vl);
+ __riscv_vlseg8e8_v_u8m1(&v_t0, &v_t1, &v_t2, &v_t3, &v_t4, &v_t5, &v_t6,
+ &v_t7, src_ptr + src_stride, vl);
+ // Calculate sum of [e00, e21] to v_e
+ // Calculate sum of [f00, f21] to v_f
+ // Calculate sum of [g00, g11] to v_g
+ v_e0 = __riscv_vwaddu_vv_u16m2(v_s0, v_t0, vl);
+ v_e1 = __riscv_vwaddu_vv_u16m2(v_s1, v_t1, vl);
+ v_e2 = __riscv_vwaddu_vv_u16m2(v_s2, v_t2, vl);
+ v_f0 = __riscv_vwaddu_vv_u16m2(v_s3, v_t3, vl);
+ v_f1 = __riscv_vwaddu_vv_u16m2(v_s4, v_t4, vl);
+ v_f2 = __riscv_vwaddu_vv_u16m2(v_s5, v_t5, vl);
+ v_g0 = __riscv_vwaddu_vv_u16m2(v_s6, v_t6, vl);
+ v_g1 = __riscv_vwaddu_vv_u16m2(v_s7, v_t7, vl);
+
+ v_e0 = __riscv_vadd_vv_u16m2(v_e0, v_e1, vl);
+ v_f0 = __riscv_vadd_vv_u16m2(v_f0, v_f1, vl);
+ v_e = __riscv_vadd_vv_u16m2(v_e0, v_e2, vl);
+ v_f = __riscv_vadd_vv_u16m2(v_f0, v_f2, vl);
+ v_g = __riscv_vadd_vv_u16m2(v_g0, v_g1, vl);
+
+ // Average in 16-bit fixed-point
+ v_e = __riscv_vmulhu_vx_u16m2(v_e, coeff_a, vl);
+ v_f = __riscv_vmulhu_vx_u16m2(v_f, coeff_a, vl);
+ v_g = __riscv_vmulhu_vx_u16m2(v_g, coeff_b, vl);
+
+ v_dst_e = __riscv_vnsrl_wx_u8m1(v_e, 0, vl);
+ v_dst_f = __riscv_vnsrl_wx_u8m1(v_f, 0, vl);
+ v_dst_g = __riscv_vnsrl_wx_u8m1(v_g, 0, vl);
+
+ __riscv_vsseg3e8_v_u8m1(dst_ptr, v_dst_e, v_dst_f, v_dst_g, vl);
+ w -= vl;
+ src_ptr += 8 * vl;
+ dst_ptr += 3 * vl;
+ } while (w > 0);
+}
+#endif
+
+#ifdef HAS_SCALEROWDOWN38_3_BOX_RVV
+void ScaleRowDown38_3_Box_RVV(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ int dst_width) {
+ size_t w = (size_t)dst_width / 3u;
+ const uint16_t coeff_a = (65536u / 9u);
+ const uint16_t coeff_b = (65536u / 6u);
+ assert((dst_width % 3 == 0) && (dst_width > 0));
+ do {
+ vuint8m1_t v_s0, v_s1, v_s2, v_s3, v_s4, v_s5, v_s6, v_s7;
+ vuint8m1_t v_t0, v_t1, v_t2, v_t3, v_t4, v_t5, v_t6, v_t7;
+ vuint8m1_t v_u0, v_u1, v_u2, v_u3, v_u4, v_u5, v_u6, v_u7;
+ vuint16m2_t v_e0, v_e1, v_e2, v_e3, v_e4, v_e;
+ vuint16m2_t v_f0, v_f1, v_f2, v_f3, v_f4, v_f;
+ vuint16m2_t v_g0, v_g1, v_g2, v_g;
+ vuint8m1_t v_dst_e, v_dst_f, v_dst_g;
+ size_t vl = __riscv_vsetvl_e8m1(w);
+ // s: e00, e10, e20, f00, f10, f20, g00, g10
+ // t: e01, e11, e21, f01, f11, f21, g01, g11
+ // u: e02, e12, e22, f02, f12, f22, g02, g12
+ __riscv_vlseg8e8_v_u8m1(&v_s0, &v_s1, &v_s2, &v_s3, &v_s4, &v_s5, &v_s6,
+ &v_s7, src_ptr, vl);
+ __riscv_vlseg8e8_v_u8m1(&v_t0, &v_t1, &v_t2, &v_t3, &v_t4, &v_t5, &v_t6,
+ &v_t7, src_ptr + src_stride, vl);
+ __riscv_vlseg8e8_v_u8m1(&v_u0, &v_u1, &v_u2, &v_u3, &v_u4, &v_u5, &v_u6,
+ &v_u7, src_ptr + 2 * src_stride, vl);
+ // Calculate sum of [e00, e22]
+ v_e0 = __riscv_vwaddu_vv_u16m2(v_s0, v_t0, vl);
+ v_e1 = __riscv_vwaddu_vv_u16m2(v_s1, v_t1, vl);
+ v_e2 = __riscv_vwaddu_vv_u16m2(v_s2, v_t2, vl);
+ v_e3 = __riscv_vwaddu_vv_u16m2(v_u0, v_u1, vl);
+ v_e4 = __riscv_vwaddu_vx_u16m2(v_u2, 0, vl);
+
+ v_e0 = __riscv_vadd_vv_u16m2(v_e0, v_e1, vl);
+ v_e2 = __riscv_vadd_vv_u16m2(v_e2, v_e3, vl);
+ v_e0 = __riscv_vadd_vv_u16m2(v_e0, v_e4, vl);
+ v_e = __riscv_vadd_vv_u16m2(v_e0, v_e2, vl);
+ // Calculate sum of [f00, f22]
+ v_f0 = __riscv_vwaddu_vv_u16m2(v_s3, v_t3, vl);
+ v_f1 = __riscv_vwaddu_vv_u16m2(v_s4, v_t4, vl);
+ v_f2 = __riscv_vwaddu_vv_u16m2(v_s5, v_t5, vl);
+ v_f3 = __riscv_vwaddu_vv_u16m2(v_u3, v_u4, vl);
+ v_f4 = __riscv_vwaddu_vx_u16m2(v_u5, 0, vl);
+
+ v_f0 = __riscv_vadd_vv_u16m2(v_f0, v_f1, vl);
+ v_f2 = __riscv_vadd_vv_u16m2(v_f2, v_f3, vl);
+ v_f0 = __riscv_vadd_vv_u16m2(v_f0, v_f4, vl);
+ v_f = __riscv_vadd_vv_u16m2(v_f0, v_f2, vl);
+ // Calculate sum of [g00, g12]
+ v_g0 = __riscv_vwaddu_vv_u16m2(v_s6, v_t6, vl);
+ v_g1 = __riscv_vwaddu_vv_u16m2(v_s7, v_t7, vl);
+ v_g2 = __riscv_vwaddu_vv_u16m2(v_u6, v_u7, vl);
+
+ v_g = __riscv_vadd_vv_u16m2(v_g0, v_g1, vl);
+ v_g = __riscv_vadd_vv_u16m2(v_g, v_g2, vl);
+
+ // Average in 16-bit fixed-point
+ v_e = __riscv_vmulhu_vx_u16m2(v_e, coeff_a, vl);
+ v_f = __riscv_vmulhu_vx_u16m2(v_f, coeff_a, vl);
+ v_g = __riscv_vmulhu_vx_u16m2(v_g, coeff_b, vl);
+
+ v_dst_e = __riscv_vnsrl_wx_u8m1(v_e, 0, vl);
+ v_dst_f = __riscv_vnsrl_wx_u8m1(v_f, 0, vl);
+ v_dst_g = __riscv_vnsrl_wx_u8m1(v_g, 0, vl);
+ __riscv_vsseg3e8_v_u8m1(dst_ptr, v_dst_e, v_dst_f, v_dst_g, vl);
+ w -= vl;
+ src_ptr += 8 * vl;
+ dst_ptr += 3 * vl;
+ } while (w > 0);
+}
+#endif
+
+// ScaleUVRowUp2_(Bi)linear_RVV function is equal to other platforms'
+// ScaleRowUp2_(Bi)linear_Any_XXX. We process entire row in this function. Other
+// platforms only implement non-edge part of image and process edge with scalar.
+
+#ifdef HAS_SCALEROWUP2_LINEAR_RVV
+void ScaleRowUp2_Linear_RVV(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ int dst_width) {
+ size_t work_width = (size_t)dst_width - 1u;
+ size_t src_width = work_width >> 1u;
+ const uint8_t* work_src_ptr = src_ptr;
+ uint8_t* work_dst_ptr = dst_ptr + 1;
+ size_t vl = __riscv_vsetvlmax_e8m4();
+ vuint8m4_t v_3 = __riscv_vmv_v_x_u8m4(3, vl);
+ dst_ptr[0] = src_ptr[0];
+ while (src_width > 0) {
+ vuint8m4_t v_src0, v_src1, v_dst_odd, v_dst_even;
+ vuint16m8_t v_src0_u16, v_src1_u16;
+ size_t vl = __riscv_vsetvl_e8m4(src_width);
+ v_src0 = __riscv_vle8_v_u8m4(work_src_ptr, vl);
+ v_src1 = __riscv_vle8_v_u8m4(work_src_ptr + 1, vl);
+
+ v_src0_u16 = __riscv_vwaddu_vx_u16m8(v_src0, 2, vl);
+ v_src1_u16 = __riscv_vwaddu_vx_u16m8(v_src1, 2, vl);
+ v_src0_u16 = __riscv_vwmaccu_vv_u16m8(v_src0_u16, v_3, v_src1, vl);
+ v_src1_u16 = __riscv_vwmaccu_vv_u16m8(v_src1_u16, v_3, v_src0, vl);
+
+ v_dst_odd = __riscv_vnsrl_wx_u8m4(v_src0_u16, 2, vl);
+ v_dst_even = __riscv_vnsrl_wx_u8m4(v_src1_u16, 2, vl);
+
+ __riscv_vsseg2e8_v_u8m4(work_dst_ptr, v_dst_even, v_dst_odd, vl);
+
+ src_width -= vl;
+ work_src_ptr += vl;
+ work_dst_ptr += 2 * vl;
+ }
+ dst_ptr[dst_width - 1] = src_ptr[(dst_width - 1) / 2];
+}
+#endif
+
+#ifdef HAS_SCALEROWUP2_BILINEAR_RVV
+void ScaleRowUp2_Bilinear_RVV(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ ptrdiff_t dst_stride,
+ int dst_width) {
+ size_t work_width = ((size_t)dst_width - 1u) & ~1u;
+ size_t src_width = work_width >> 1u;
+ const uint8_t* work_s = src_ptr;
+ const uint8_t* work_t = src_ptr + src_stride;
+ const uint8_t* s = work_s;
+ const uint8_t* t = work_t;
+ uint8_t* d = dst_ptr;
+ uint8_t* e = dst_ptr + dst_stride;
+ uint8_t* work_d = d + 1;
+ uint8_t* work_e = e + 1;
+ size_t vl = __riscv_vsetvlmax_e16m4();
+ vuint16m4_t v_3_u16 = __riscv_vmv_v_x_u16m4(3, vl);
+ vuint8m2_t v_3_u8 = __riscv_vmv_v_x_u8m2(3, vl);
+ d[0] = (3 * s[0] + t[0] + 2) >> 2;
+ e[0] = (s[0] + 3 * t[0] + 2) >> 2;
+ while (src_width > 0) {
+ vuint8m2_t v_s0, v_s1, v_t0, v_t1;
+ vuint16m4_t v_s0_u16, v_s1_u16, v_t0_u16, v_t1_u16;
+ vuint16m4_t v_t0_u16_, v_t1_u16_;
+ vuint8m2_t v_dst0_even, v_dst0_odd, v_dst1_even, v_dst1_odd;
+ size_t vl = __riscv_vsetvl_e8m2(src_width);
+ v_s0 = __riscv_vle8_v_u8m2(work_s, vl);
+ v_s1 = __riscv_vle8_v_u8m2(work_s + 1, vl);
+
+ v_s0_u16 = __riscv_vwaddu_vx_u16m4(v_s0, 2, vl);
+ v_s1_u16 = __riscv_vwaddu_vx_u16m4(v_s1, 2, vl);
+ v_s0_u16 = __riscv_vwmaccu_vv_u16m4(v_s0_u16, v_3_u8, v_s1, vl);
+ v_s1_u16 = __riscv_vwmaccu_vv_u16m4(v_s1_u16, v_3_u8, v_s0, vl);
+
+ v_t0 = __riscv_vle8_v_u8m2(work_t, vl);
+ v_t1 = __riscv_vle8_v_u8m2(work_t + 1, vl);
+
+ v_t0_u16 = __riscv_vwaddu_vx_u16m4(v_t0, 2, vl);
+ v_t1_u16 = __riscv_vwaddu_vx_u16m4(v_t1, 2, vl);
+ v_t0_u16 = __riscv_vwmaccu_vv_u16m4(v_t0_u16, v_3_u8, v_t1, vl);
+ v_t1_u16 = __riscv_vwmaccu_vv_u16m4(v_t1_u16, v_3_u8, v_t0, vl);
+
+ v_t0_u16_ = __riscv_vmv_v_v_u16m4(v_t0_u16, vl);
+ v_t1_u16_ = __riscv_vmv_v_v_u16m4(v_t1_u16, vl);
+
+ v_t0_u16 = __riscv_vmacc_vv_u16m4(v_t0_u16, v_3_u16, v_s0_u16, vl);
+ v_t1_u16 = __riscv_vmacc_vv_u16m4(v_t1_u16, v_3_u16, v_s1_u16, vl);
+ v_s0_u16 = __riscv_vmacc_vv_u16m4(v_s0_u16, v_3_u16, v_t0_u16_, vl);
+ v_s1_u16 = __riscv_vmacc_vv_u16m4(v_s1_u16, v_3_u16, v_t1_u16_, vl);
+
+ v_dst0_odd = __riscv_vnsrl_wx_u8m2(v_t0_u16, 4, vl);
+ v_dst0_even = __riscv_vnsrl_wx_u8m2(v_t1_u16, 4, vl);
+ v_dst1_odd = __riscv_vnsrl_wx_u8m2(v_s0_u16, 4, vl);
+ v_dst1_even = __riscv_vnsrl_wx_u8m2(v_s1_u16, 4, vl);
+
+ __riscv_vsseg2e8_v_u8m2(work_d, v_dst0_even, v_dst0_odd, vl);
+ __riscv_vsseg2e8_v_u8m2(work_e, v_dst1_even, v_dst1_odd, vl);
+
+ src_width -= vl;
+ work_s += vl;
+ work_t += vl;
+ work_d += 2 * vl;
+ work_e += 2 * vl;
+ }
+ d[dst_width - 1] =
+ (3 * s[(dst_width - 1) / 2] + t[(dst_width - 1) / 2] + 2) >> 2;
+ e[dst_width - 1] =
+ (s[(dst_width - 1) / 2] + 3 * t[(dst_width - 1) / 2] + 2) >> 2;
+}
+#endif
+
+#ifdef HAS_SCALEUVROWDOWN2_RVV
+void ScaleUVRowDown2_RVV(const uint8_t* src_uv,
+ ptrdiff_t src_stride,
+ uint8_t* dst_uv,
+ int dst_width) {
+ size_t w = (size_t)dst_width;
+ const uint32_t* src = (const uint32_t*)src_uv;
+ uint16_t* dst = (uint16_t*)dst_uv;
+ (void)src_stride;
+ do {
+ size_t vl = __riscv_vsetvl_e32m8(w);
+ vuint32m8_t v_data = __riscv_vle32_v_u32m8(src, vl);
+ vuint16m4_t v_u1v1 = __riscv_vnsrl_wx_u16m4(v_data, 16, vl);
+ __riscv_vse16_v_u16m4(dst, v_u1v1, vl);
+ w -= vl;
+ src += vl;
+ dst += vl;
+ } while (w > 0);
+}
+#endif
+
+#ifdef HAS_SCALEUVROWDOWN2LINEAR_RVV
+void ScaleUVRowDown2Linear_RVV(const uint8_t* src_uv,
+ ptrdiff_t src_stride,
+ uint8_t* dst_uv,
+ int dst_width) {
+ size_t w = (size_t)dst_width;
+ const uint16_t* src = (const uint16_t*)src_uv;
+ (void)src_stride;
+ // NOTE: To match behavior on other platforms, vxrm (fixed-point rounding mode
+ // register) is set to round-to-nearest-up mode(0).
+ asm volatile("csrwi vxrm, 0");
+ do {
+ vuint8m4_t v_u0v0, v_u1v1, v_avg;
+ vuint16m4_t v_u0v0_16, v_u1v1_16;
+ size_t vl = __riscv_vsetvl_e16m4(w);
+ __riscv_vlseg2e16_v_u16m4(&v_u0v0_16, &v_u1v1_16, src, vl);
+ v_u0v0 = __riscv_vreinterpret_v_u16m4_u8m4(v_u0v0_16);
+ v_u1v1 = __riscv_vreinterpret_v_u16m4_u8m4(v_u1v1_16);
+ // Use round-to-nearest-up mode for averaging add
+ v_avg = __riscv_vaaddu_vv_u8m4(v_u0v0, v_u1v1, vl * 2);
+ __riscv_vse8_v_u8m4(dst_uv, v_avg, vl * 2);
+ w -= vl;
+ src += vl * 2;
+ dst_uv += vl * 2;
+ } while (w > 0);
+}
+#endif
+
+#ifdef HAS_SCALEUVROWDOWN2BOX_RVV
+void ScaleUVRowDown2Box_RVV(const uint8_t* src_uv,
+ ptrdiff_t src_stride,
+ uint8_t* dst_uv,
+ int dst_width) {
+ const uint8_t* src_uv_row1 = src_uv + src_stride;
+ size_t w = (size_t)dst_width;
+ // NOTE: To match behavior on other platforms, vxrm (fixed-point rounding mode
+ // register) is set to round-to-nearest-up mode(0).
+ asm volatile("csrwi vxrm, 0");
+ do {
+ vuint8m2_t v_u0_row0, v_v0_row0, v_u1_row0, v_v1_row0;
+ vuint8m2_t v_u0_row1, v_v0_row1, v_u1_row1, v_v1_row1;
+ vuint16m4_t v_u0u1_row0, v_u0u1_row1, v_v0v1_row0, v_v0v1_row1;
+ vuint16m4_t v_sum0, v_sum1;
+ vuint8m2_t v_dst_u, v_dst_v;
+ size_t vl = __riscv_vsetvl_e8m2(w);
+
+ __riscv_vlseg4e8_v_u8m2(&v_u0_row0, &v_v0_row0, &v_u1_row0, &v_v1_row0,
+ src_uv, vl);
+ __riscv_vlseg4e8_v_u8m2(&v_u0_row1, &v_v0_row1, &v_u1_row1, &v_v1_row1,
+ src_uv_row1, vl);
+
+ v_u0u1_row0 = __riscv_vwaddu_vv_u16m4(v_u0_row0, v_u1_row0, vl);
+ v_u0u1_row1 = __riscv_vwaddu_vv_u16m4(v_u0_row1, v_u1_row1, vl);
+ v_v0v1_row0 = __riscv_vwaddu_vv_u16m4(v_v0_row0, v_v1_row0, vl);
+ v_v0v1_row1 = __riscv_vwaddu_vv_u16m4(v_v0_row1, v_v1_row1, vl);
+
+ v_sum0 = __riscv_vadd_vv_u16m4(v_u0u1_row0, v_u0u1_row1, vl);
+ v_sum1 = __riscv_vadd_vv_u16m4(v_v0v1_row0, v_v0v1_row1, vl);
+ // Use round-to-nearest-up mode for vnclip
+ v_dst_u = __riscv_vnclipu_wx_u8m2(v_sum0, 2, vl);
+ v_dst_v = __riscv_vnclipu_wx_u8m2(v_sum1, 2, vl);
+
+ __riscv_vsseg2e8_v_u8m2(dst_uv, v_dst_u, v_dst_v, vl);
+
+ dst_uv += 2 * vl;
+ src_uv += 4 * vl;
+ w -= vl;
+ src_uv_row1 += 4 * vl;
+ } while (w > 0);
+}
+#endif
+
+#ifdef HAS_SCALEUVROWDOWN4_RVV
+void ScaleUVRowDown4_RVV(const uint8_t* src_uv,
+ ptrdiff_t src_stride,
+ int src_stepx,
+ uint8_t* dst_uv,
+ int dst_width) {
+ // Overflow will never happen here, since sizeof(size_t)/sizeof(int)=2.
+ // dst_width = src_width / 4 and src_width is also int.
+ size_t w = (size_t)dst_width * 8;
+ (void)src_stride;
+ (void)src_stepx;
+ do {
+ size_t vl = __riscv_vsetvl_e8m8(w);
+ vuint8m8_t v_row = __riscv_vle8_v_u8m8(src_uv, vl);
+ vuint64m8_t v_row_64 = __riscv_vreinterpret_v_u8m8_u64m8(v_row);
+ // Narrowing without clipping
+ vuint32m4_t v_tmp = __riscv_vncvt_x_x_w_u32m4(v_row_64, vl / 8);
+ vuint16m2_t v_dst_16 = __riscv_vncvt_x_x_w_u16m2(v_tmp, vl / 8);
+ vuint8m2_t v_dst = __riscv_vreinterpret_v_u16m2_u8m2(v_dst_16);
+ __riscv_vse8_v_u8m2(dst_uv, v_dst, vl / 4);
+ w -= vl;
+ src_uv += vl;
+ dst_uv += vl / 4;
+ } while (w > 0);
+}
+#endif
+
+#ifdef HAS_SCALEUVROWDOWNEVEN_RVV
+void ScaleUVRowDownEven_RVV(const uint8_t* src_uv,
+ ptrdiff_t src_stride,
+ int src_stepx,
+ uint8_t* dst_uv,
+ int dst_width) {
+ size_t w = (size_t)dst_width;
+ const ptrdiff_t stride_byte = (ptrdiff_t)src_stepx * 2;
+ const uint16_t* src = (const uint16_t*)(src_uv);
+ uint16_t* dst = (uint16_t*)(dst_uv);
+ (void)src_stride;
+ do {
+ size_t vl = __riscv_vsetvl_e16m8(w);
+ vuint16m8_t v_row = __riscv_vlse16_v_u16m8(src, stride_byte, vl);
+ __riscv_vse16_v_u16m8(dst, v_row, vl);
+ w -= vl;
+ src += vl * src_stepx;
+ dst += vl;
+ } while (w > 0);
+}
+#endif
+
+// ScaleUVRowUp2_(Bi)linear_RVV function is equal to other platforms'
+// ScaleUVRowUp2_(Bi)linear_Any_XXX. We process entire row in this function.
+// Other platforms only implement non-edge part of image and process edge with
+// scalar.
+
+#ifdef HAS_SCALEUVROWUP2_LINEAR_RVV
+void ScaleUVRowUp2_Linear_RVV(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ int dst_width) {
+ size_t work_width = ((size_t)dst_width - 1u) & ~1u;
+ uint16_t* work_dst_ptr = (uint16_t*)dst_ptr + 1;
+ const uint8_t* work_src_ptr = src_ptr;
+ size_t vl = __riscv_vsetvlmax_e8m4();
+ vuint8m4_t v_3_u8 = __riscv_vmv_v_x_u8m4(3, vl);
+ dst_ptr[0] = src_ptr[0];
+ dst_ptr[1] = src_ptr[1];
+ while (work_width > 0) {
+ vuint8m4_t v_uv0, v_uv1, v_dst_odd_u8, v_dst_even_u8;
+ vuint16m4_t v_dst_odd, v_dst_even;
+ vuint16m8_t v_uv0_u16, v_uv1_u16;
+ size_t vl = __riscv_vsetvl_e8m4(work_width);
+ v_uv0 = __riscv_vle8_v_u8m4(work_src_ptr, vl);
+ v_uv1 = __riscv_vle8_v_u8m4(work_src_ptr + 2, vl);
+
+ v_uv0_u16 = __riscv_vwaddu_vx_u16m8(v_uv0, 2, vl);
+ v_uv1_u16 = __riscv_vwaddu_vx_u16m8(v_uv1, 2, vl);
+
+ v_uv0_u16 = __riscv_vwmaccu_vv_u16m8(v_uv0_u16, v_3_u8, v_uv1, vl);
+ v_uv1_u16 = __riscv_vwmaccu_vv_u16m8(v_uv1_u16, v_3_u8, v_uv0, vl);
+
+ v_dst_odd_u8 = __riscv_vnsrl_wx_u8m4(v_uv0_u16, 2, vl);
+ v_dst_even_u8 = __riscv_vnsrl_wx_u8m4(v_uv1_u16, 2, vl);
+
+ v_dst_even = __riscv_vreinterpret_v_u8m4_u16m4(v_dst_even_u8);
+ v_dst_odd = __riscv_vreinterpret_v_u8m4_u16m4(v_dst_odd_u8);
+
+ __riscv_vsseg2e16_v_u16m4(work_dst_ptr, v_dst_even, v_dst_odd, vl / 2);
+
+ work_width -= vl;
+ work_src_ptr += vl;
+ work_dst_ptr += vl;
+ }
+ dst_ptr[2 * dst_width - 2] = src_ptr[((dst_width + 1) & ~1) - 2];
+ dst_ptr[2 * dst_width - 1] = src_ptr[((dst_width + 1) & ~1) - 1];
+}
+#endif
+
+#ifdef HAS_SCALEUVROWUP2_BILINEAR_RVV
+void ScaleUVRowUp2_Bilinear_RVV(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst_ptr,
+ ptrdiff_t dst_stride,
+ int dst_width) {
+ size_t work_width = ((size_t)dst_width - 1u) & ~1u;
+ const uint8_t* work_s = src_ptr;
+ const uint8_t* work_t = src_ptr + src_stride;
+ const uint8_t* s = work_s;
+ const uint8_t* t = work_t;
+ uint8_t* d = dst_ptr;
+ uint8_t* e = dst_ptr + dst_stride;
+ uint16_t* work_d = (uint16_t*)d + 1;
+ uint16_t* work_e = (uint16_t*)e + 1;
+ size_t vl = __riscv_vsetvlmax_e16m4();
+ vuint16m4_t v_3_u16 = __riscv_vmv_v_x_u16m4(3, vl);
+ vuint8m2_t v_3_u8 = __riscv_vmv_v_x_u8m2(3, vl);
+ d[0] = (3 * s[0] + t[0] + 2) >> 2;
+ e[0] = (s[0] + 3 * t[0] + 2) >> 2;
+ d[1] = (3 * s[1] + t[1] + 2) >> 2;
+ e[1] = (s[1] + 3 * t[1] + 2) >> 2;
+ while (work_width > 0) {
+ vuint8m2_t v_s0, v_s1, v_t0, v_t1;
+ vuint16m4_t v_s0_u16, v_s1_u16, v_t0_u16, v_t1_u16;
+ vuint16m4_t v_t0_u16_, v_t1_u16_;
+ vuint8m2_t v_dst0_odd_u8, v_dst0_even_u8, v_dst1_odd_u8, v_dst1_even_u8;
+ vuint16m2_t v_dst0_even, v_dst0_odd, v_dst1_even, v_dst1_odd;
+ size_t vl = __riscv_vsetvl_e8m2(work_width);
+ v_s0 = __riscv_vle8_v_u8m2(work_s, vl);
+ v_s1 = __riscv_vle8_v_u8m2(work_s + 2, vl);
+
+ v_s0_u16 = __riscv_vwaddu_vx_u16m4(v_s0, 2, vl);
+ v_s1_u16 = __riscv_vwaddu_vx_u16m4(v_s1, 2, vl);
+ v_s0_u16 = __riscv_vwmaccu_vv_u16m4(v_s0_u16, v_3_u8, v_s1, vl);
+ v_s1_u16 = __riscv_vwmaccu_vv_u16m4(v_s1_u16, v_3_u8, v_s0, vl);
+
+ v_t0 = __riscv_vle8_v_u8m2(work_t, vl);
+ v_t1 = __riscv_vle8_v_u8m2(work_t + 2, vl);
+
+ v_t0_u16 = __riscv_vwaddu_vx_u16m4(v_t0, 2, vl);
+ v_t1_u16 = __riscv_vwaddu_vx_u16m4(v_t1, 2, vl);
+ v_t0_u16 = __riscv_vwmaccu_vv_u16m4(v_t0_u16, v_3_u8, v_t1, vl);
+ v_t1_u16 = __riscv_vwmaccu_vv_u16m4(v_t1_u16, v_3_u8, v_t0, vl);
+
+ v_t0_u16_ = __riscv_vmv_v_v_u16m4(v_t0_u16, vl);
+ v_t1_u16_ = __riscv_vmv_v_v_u16m4(v_t1_u16, vl);
+
+ v_t0_u16 = __riscv_vmacc_vv_u16m4(v_t0_u16, v_3_u16, v_s0_u16, vl);
+ v_t1_u16 = __riscv_vmacc_vv_u16m4(v_t1_u16, v_3_u16, v_s1_u16, vl);
+ v_s0_u16 = __riscv_vmacc_vv_u16m4(v_s0_u16, v_3_u16, v_t0_u16_, vl);
+ v_s1_u16 = __riscv_vmacc_vv_u16m4(v_s1_u16, v_3_u16, v_t1_u16_, vl);
+
+ v_dst0_odd_u8 = __riscv_vnsrl_wx_u8m2(v_t0_u16, 4, vl);
+ v_dst0_even_u8 = __riscv_vnsrl_wx_u8m2(v_t1_u16, 4, vl);
+ v_dst1_odd_u8 = __riscv_vnsrl_wx_u8m2(v_s0_u16, 4, vl);
+ v_dst1_even_u8 = __riscv_vnsrl_wx_u8m2(v_s1_u16, 4, vl);
+
+ v_dst0_even = __riscv_vreinterpret_v_u8m2_u16m2(v_dst0_even_u8);
+ v_dst0_odd = __riscv_vreinterpret_v_u8m2_u16m2(v_dst0_odd_u8);
+ v_dst1_even = __riscv_vreinterpret_v_u8m2_u16m2(v_dst1_even_u8);
+ v_dst1_odd = __riscv_vreinterpret_v_u8m2_u16m2(v_dst1_odd_u8);
+
+ __riscv_vsseg2e16_v_u16m2(work_d, v_dst0_even, v_dst0_odd, vl / 2);
+ __riscv_vsseg2e16_v_u16m2(work_e, v_dst1_even, v_dst1_odd, vl / 2);
+
+ work_width -= vl;
+ work_s += vl;
+ work_t += vl;
+ work_d += vl;
+ work_e += vl;
+ }
+ d[2 * dst_width - 2] =
+ (3 * s[((dst_width + 1) & ~1) - 2] + t[((dst_width + 1) & ~1) - 2] + 2) >>
+ 2;
+ e[2 * dst_width - 2] =
+ (s[((dst_width + 1) & ~1) - 2] + 3 * t[((dst_width + 1) & ~1) - 2] + 2) >>
+ 2;
+ d[2 * dst_width - 1] =
+ (3 * s[((dst_width + 1) & ~1) - 1] + t[((dst_width + 1) & ~1) - 1] + 2) >>
+ 2;
+ e[2 * dst_width - 1] =
+ (s[((dst_width + 1) & ~1) - 1] + 3 * t[((dst_width + 1) & ~1) - 1] + 2) >>
+ 2;
+}
+#endif
+
+#ifdef __cplusplus
+} // extern "C"
+} // namespace libyuv
+#endif
+
+#endif // !defined(LIBYUV_DISABLE_RVV) && defined(__riscv_vector) &&
+ // defined(__clang__)
diff --git a/files/source/scale_uv.cc b/source/scale_uv.cc
index 3b3d7b8e..0931c89a 100644
--- a/files/source/scale_uv.cc
+++ b/source/scale_uv.cc
@@ -83,9 +83,9 @@ static void ScaleUVDown2(int src_width,
assert((dy & 0x1ffff) == 0); // Test vertical scale is multiple of 2.
// Advance to odd row, even column.
if (filtering == kFilterBilinear) {
- src_uv += (y >> 16) * (int64_t)src_stride + (x >> 16) * 2;
+ src_uv += (y >> 16) * (intptr_t)src_stride + (x >> 16) * 2;
} else {
- src_uv += (y >> 16) * (int64_t)src_stride + ((x >> 16) - 1) * 2;
+ src_uv += (y >> 16) * (intptr_t)src_stride + ((x >> 16) - 1) * 2;
}
#if defined(HAS_SCALEUVROWDOWN2BOX_SSSE3)
@@ -112,6 +112,31 @@ static void ScaleUVDown2(int src_width,
}
}
#endif
+#if defined(HAS_SCALEUVROWDOWN2_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ ScaleUVRowDown2 =
+ filtering == kFilterNone
+ ? ScaleUVRowDown2_Any_NEON
+ : (filtering == kFilterLinear ? ScaleUVRowDown2Linear_Any_NEON
+ : ScaleUVRowDown2Box_Any_NEON);
+ if (IS_ALIGNED(dst_width, 8)) {
+ ScaleUVRowDown2 =
+ filtering == kFilterNone
+ ? ScaleUVRowDown2_NEON
+ : (filtering == kFilterLinear ? ScaleUVRowDown2Linear_NEON
+ : ScaleUVRowDown2Box_NEON);
+ }
+ }
+#endif
+#if defined(HAS_SCALEUVROWDOWN2_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ ScaleUVRowDown2 =
+ filtering == kFilterNone
+ ? ScaleUVRowDown2_RVV
+ : (filtering == kFilterLinear ? ScaleUVRowDown2Linear_RVV
+ : ScaleUVRowDown2Box_RVV);
+ }
+#endif
// This code is not enabled. Only box filter is available at this time.
#if defined(HAS_SCALEUVROWDOWN2_SSSE3)
@@ -130,23 +155,7 @@ static void ScaleUVDown2(int src_width,
}
}
#endif
-// This code is not enabled. Only box filter is available at this time.
-#if defined(HAS_SCALEUVROWDOWN2_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- ScaleUVRowDown2 =
- filtering == kFilterNone
- ? ScaleUVRowDown2_Any_NEON
- : (filtering == kFilterLinear ? ScaleUVRowDown2Linear_Any_NEON
- : ScaleUVRowDown2Box_Any_NEON);
- if (IS_ALIGNED(dst_width, 8)) {
- ScaleUVRowDown2 =
- filtering == kFilterNone
- ? ScaleUVRowDown2_NEON
- : (filtering == kFilterLinear ? ScaleUVRowDown2Linear_NEON
- : ScaleUVRowDown2Box_NEON);
- }
- }
-#endif
+
#if defined(HAS_SCALEUVROWDOWN2_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
ScaleUVRowDown2 =
@@ -179,28 +188,30 @@ static void ScaleUVDown2(int src_width,
// This is an optimized version for scaling down a UV to 1/4 of
// its original size.
#if HAS_SCALEUVDOWN4BOX
-static void ScaleUVDown4Box(int src_width,
- int src_height,
- int dst_width,
- int dst_height,
- int src_stride,
- int dst_stride,
- const uint8_t* src_uv,
- uint8_t* dst_uv,
- int x,
- int dx,
- int y,
- int dy) {
+static int ScaleUVDown4Box(int src_width,
+ int src_height,
+ int dst_width,
+ int dst_height,
+ int src_stride,
+ int dst_stride,
+ const uint8_t* src_uv,
+ uint8_t* dst_uv,
+ int x,
+ int dx,
+ int y,
+ int dy) {
int j;
// Allocate 2 rows of UV.
- const int kRowSize = (dst_width * 2 * 2 + 15) & ~15;
- align_buffer_64(row, kRowSize * 2);
+ const int row_size = (dst_width * 2 * 2 + 15) & ~15;
+ align_buffer_64(row, row_size * 2);
+ if (!row)
+ return 1;
int row_stride = src_stride * (dy >> 16);
void (*ScaleUVRowDown2)(const uint8_t* src_uv, ptrdiff_t src_stride,
uint8_t* dst_uv, int dst_width) =
ScaleUVRowDown2Box_C;
// Advance to odd row, even column.
- src_uv += (y >> 16) * (int64_t)src_stride + (x >> 16) * 2;
+ src_uv += (y >> 16) * (intptr_t)src_stride + (x >> 16) * 2;
(void)src_width;
(void)src_height;
(void)dx;
@@ -231,16 +242,22 @@ static void ScaleUVDown4Box(int src_width,
}
}
#endif
+#if defined(HAS_SCALEUVROWDOWN2BOX_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ ScaleUVRowDown2 = ScaleUVRowDown2Box_RVV;
+ }
+#endif
for (j = 0; j < dst_height; ++j) {
ScaleUVRowDown2(src_uv, src_stride, row, dst_width * 2);
- ScaleUVRowDown2(src_uv + src_stride * 2, src_stride, row + kRowSize,
+ ScaleUVRowDown2(src_uv + src_stride * 2, src_stride, row + row_size,
dst_width * 2);
- ScaleUVRowDown2(row, kRowSize, dst_uv, dst_width);
+ ScaleUVRowDown2(row, row_size, dst_uv, dst_width);
src_uv += row_stride;
dst_uv += dst_stride;
}
free_aligned_buffer_64(row);
+ return 0;
}
#endif // HAS_SCALEUVDOWN4BOX
@@ -263,7 +280,7 @@ static void ScaleUVDownEven(int src_width,
enum FilterMode filtering) {
int j;
int col_step = dx >> 16;
- int row_stride = (dy >> 16) * (int64_t)src_stride;
+ ptrdiff_t row_stride = (ptrdiff_t)((dy >> 16) * (intptr_t)src_stride);
void (*ScaleUVRowDownEven)(const uint8_t* src_uv, ptrdiff_t src_stride,
int src_step, uint8_t* dst_uv, int dst_width) =
filtering ? ScaleUVRowDownEvenBox_C : ScaleUVRowDownEven_C;
@@ -271,7 +288,7 @@ static void ScaleUVDownEven(int src_width,
(void)src_height;
assert(IS_ALIGNED(src_width, 2));
assert(IS_ALIGNED(src_height, 2));
- src_uv += (y >> 16) * (int64_t)src_stride + (x >> 16) * 2;
+ src_uv += (y >> 16) * (intptr_t)src_stride + (x >> 16) * 2;
#if defined(HAS_SCALEUVROWDOWNEVEN_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ScaleUVRowDownEven = filtering ? ScaleUVRowDownEvenBox_Any_SSSE3
@@ -310,6 +327,12 @@ static void ScaleUVDownEven(int src_width,
}
}
#endif
+#if defined(HAS_SCALEUVROWDOWNEVEN_RVV)
+ if (TestCpuFlag(kCpuHasRVV) && !filtering) {
+ ScaleUVRowDownEven =
+ (col_step == 4) ? ScaleUVRowDown4_RVV : ScaleUVRowDownEven_RVV;
+ }
+#endif
if (filtering == kFilterLinear) {
src_stride = 0;
@@ -324,24 +347,24 @@ static void ScaleUVDownEven(int src_width,
// Scale UV down with bilinear interpolation.
#if HAS_SCALEUVBILINEARDOWN
-static void ScaleUVBilinearDown(int src_width,
- int src_height,
- int dst_width,
- int dst_height,
- int src_stride,
- int dst_stride,
- const uint8_t* src_uv,
- uint8_t* dst_uv,
- int x,
- int dx,
- int y,
- int dy,
- enum FilterMode filtering) {
+static int ScaleUVBilinearDown(int src_width,
+ int src_height,
+ int dst_width,
+ int dst_height,
+ int src_stride,
+ int dst_stride,
+ const uint8_t* src_uv,
+ uint8_t* dst_uv,
+ int x,
+ int dx,
+ int y,
+ int dy,
+ enum FilterMode filtering) {
int j;
- void (*InterpolateRow)(uint8_t * dst_uv, const uint8_t* src_uv,
+ void (*InterpolateRow)(uint8_t* dst_uv, const uint8_t* src_uv,
ptrdiff_t src_stride, int dst_width,
int source_y_fraction) = InterpolateRow_C;
- void (*ScaleUVFilterCols)(uint8_t * dst_uv, const uint8_t* src_uv,
+ void (*ScaleUVFilterCols)(uint8_t* dst_uv, const uint8_t* src_uv,
int dst_width, int x, int dx) =
(src_width >= 32768) ? ScaleUVFilterCols64_C : ScaleUVFilterCols_C;
int64_t xlast = x + (int64_t)(dst_width - 1) * dx;
@@ -397,6 +420,11 @@ static void ScaleUVBilinearDown(int src_width,
}
}
#endif
+#if defined(HAS_INTERPOLATEROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ InterpolateRow = InterpolateRow_RVV;
+ }
+#endif
#if defined(HAS_SCALEUVFILTERCOLS_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
ScaleUVFilterCols = ScaleUVFilterCols_SSSE3;
@@ -421,15 +449,16 @@ static void ScaleUVBilinearDown(int src_width,
// TODO(fbarchard): Consider not allocating row buffer for kFilterLinear.
// Allocate a row of UV.
{
- align_buffer_64(row, clip_src_width * 2);
-
const int max_y = (src_height - 1) << 16;
+ align_buffer_64(row, clip_src_width * 2);
+ if (!row)
+ return 1;
if (y > max_y) {
y = max_y;
}
for (j = 0; j < dst_height; ++j) {
int yi = y >> 16;
- const uint8_t* src = src_uv + yi * (int64_t)src_stride;
+ const uint8_t* src = src_uv + yi * (intptr_t)src_stride;
if (filtering == kFilterLinear) {
ScaleUVFilterCols(dst_uv, src, dst_width, x, dx);
} else {
@@ -445,29 +474,30 @@ static void ScaleUVBilinearDown(int src_width,
}
free_aligned_buffer_64(row);
}
+ return 0;
}
#endif
// Scale UV up with bilinear interpolation.
#if HAS_SCALEUVBILINEARUP
-static void ScaleUVBilinearUp(int src_width,
- int src_height,
- int dst_width,
- int dst_height,
- int src_stride,
- int dst_stride,
- const uint8_t* src_uv,
- uint8_t* dst_uv,
- int x,
- int dx,
- int y,
- int dy,
- enum FilterMode filtering) {
+static int ScaleUVBilinearUp(int src_width,
+ int src_height,
+ int dst_width,
+ int dst_height,
+ int src_stride,
+ int dst_stride,
+ const uint8_t* src_uv,
+ uint8_t* dst_uv,
+ int x,
+ int dx,
+ int y,
+ int dy,
+ enum FilterMode filtering) {
int j;
- void (*InterpolateRow)(uint8_t * dst_uv, const uint8_t* src_uv,
+ void (*InterpolateRow)(uint8_t* dst_uv, const uint8_t* src_uv,
ptrdiff_t src_stride, int dst_width,
int source_y_fraction) = InterpolateRow_C;
- void (*ScaleUVFilterCols)(uint8_t * dst_uv, const uint8_t* src_uv,
+ void (*ScaleUVFilterCols)(uint8_t* dst_uv, const uint8_t* src_uv,
int dst_width, int x, int dx) =
filtering ? ScaleUVFilterCols_C : ScaleUVCols_C;
const int max_y = (src_height - 1) << 16;
@@ -511,6 +541,11 @@ static void ScaleUVBilinearUp(int src_width,
}
}
#endif
+#if defined(HAS_INTERPOLATEROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ InterpolateRow = InterpolateRow_RVV;
+ }
+#endif
if (src_width >= 32768) {
ScaleUVFilterCols = filtering ? ScaleUVFilterCols64_C : ScaleUVCols64_C;
}
@@ -571,14 +606,16 @@ static void ScaleUVBilinearUp(int src_width,
{
int yi = y >> 16;
- const uint8_t* src = src_uv + yi * (int64_t)src_stride;
+ const uint8_t* src = src_uv + yi * (intptr_t)src_stride;
// Allocate 2 rows of UV.
- const int kRowSize = (dst_width * 2 + 15) & ~15;
- align_buffer_64(row, kRowSize * 2);
+ const int row_size = (dst_width * 2 + 15) & ~15;
+ align_buffer_64(row, row_size * 2);
+ if (!row)
+ return 1;
uint8_t* rowptr = row;
- int rowstride = kRowSize;
+ int rowstride = row_size;
int lasty = yi;
ScaleUVFilterCols(rowptr, src, dst_width, x, dx);
@@ -596,7 +633,7 @@ static void ScaleUVBilinearUp(int src_width,
if (y > max_y) {
y = max_y;
yi = y >> 16;
- src = src_uv + yi * (int64_t)src_stride;
+ src = src_uv + yi * (intptr_t)src_stride;
}
if (yi != lasty) {
ScaleUVFilterCols(rowptr, src, dst_width, x, dx);
@@ -619,6 +656,7 @@ static void ScaleUVBilinearUp(int src_width,
}
free_aligned_buffer_64(row);
}
+ return 0;
}
#endif // HAS_SCALEUVBILINEARUP
@@ -627,14 +665,14 @@ static void ScaleUVBilinearUp(int src_width,
// This is an optimized version for scaling up a plane to 2 times of
// its original width, using linear interpolation.
// This is used to scale U and V planes of NV16 to NV24.
-void ScaleUVLinearUp2(int src_width,
- int src_height,
- int dst_width,
- int dst_height,
- int src_stride,
- int dst_stride,
- const uint8_t* src_uv,
- uint8_t* dst_uv) {
+static void ScaleUVLinearUp2(int src_width,
+ int src_height,
+ int dst_width,
+ int dst_height,
+ int src_stride,
+ int dst_stride,
+ const uint8_t* src_uv,
+ uint8_t* dst_uv) {
void (*ScaleRowUp)(const uint8_t* src_uv, uint8_t* dst_uv, int dst_width) =
ScaleUVRowUp2_Linear_Any_C;
int i;
@@ -644,32 +682,38 @@ void ScaleUVLinearUp2(int src_width,
// This function can only scale up by 2 times horizontally.
assert(src_width == ((dst_width + 1) / 2));
-#ifdef HAS_SCALEUVROWUP2LINEAR_SSSE3
+#ifdef HAS_SCALEUVROWUP2_LINEAR_SSSE3
if (TestCpuFlag(kCpuHasSSSE3)) {
ScaleRowUp = ScaleUVRowUp2_Linear_Any_SSSE3;
}
#endif
-#ifdef HAS_SCALEUVROWUP2LINEAR_AVX2
+#ifdef HAS_SCALEUVROWUP2_LINEAR_AVX2
if (TestCpuFlag(kCpuHasAVX2)) {
ScaleRowUp = ScaleUVRowUp2_Linear_Any_AVX2;
}
#endif
-#ifdef HAS_SCALEUVROWUP2LINEAR_NEON
+#ifdef HAS_SCALEUVROWUP2_LINEAR_NEON
if (TestCpuFlag(kCpuHasNEON)) {
ScaleRowUp = ScaleUVRowUp2_Linear_Any_NEON;
}
#endif
+#ifdef HAS_SCALEUVROWUP2_LINEAR_RVV
+ if (TestCpuFlag(kCpuHasRVV)) {
+ ScaleRowUp = ScaleUVRowUp2_Linear_RVV;
+ }
+#endif
+
if (dst_height == 1) {
- ScaleRowUp(src_uv + ((src_height - 1) / 2) * (int64_t)src_stride, dst_uv,
+ ScaleRowUp(src_uv + ((src_height - 1) / 2) * (intptr_t)src_stride, dst_uv,
dst_width);
} else {
dy = FixedDiv(src_height - 1, dst_height - 1);
y = (1 << 15) - 1;
for (i = 0; i < dst_height; ++i) {
- ScaleRowUp(src_uv + (y >> 16) * (int64_t)src_stride, dst_uv, dst_width);
+ ScaleRowUp(src_uv + (y >> 16) * (intptr_t)src_stride, dst_uv, dst_width);
dst_uv += dst_stride;
y += dy;
}
@@ -680,14 +724,14 @@ void ScaleUVLinearUp2(int src_width,
// This is an optimized version for scaling up a plane to 2 times of
// its original size, using bilinear interpolation.
// This is used to scale U and V planes of NV12 to NV24.
-void ScaleUVBilinearUp2(int src_width,
- int src_height,
- int dst_width,
- int dst_height,
- int src_stride,
- int dst_stride,
- const uint8_t* src_ptr,
- uint8_t* dst_ptr) {
+static void ScaleUVBilinearUp2(int src_width,
+ int src_height,
+ int dst_width,
+ int dst_height,
+ int src_stride,
+ int dst_stride,
+ const uint8_t* src_ptr,
+ uint8_t* dst_ptr) {
void (*Scale2RowUp)(const uint8_t* src_ptr, ptrdiff_t src_stride,
uint8_t* dst_ptr, ptrdiff_t dst_stride, int dst_width) =
ScaleUVRowUp2_Bilinear_Any_C;
@@ -697,24 +741,30 @@ void ScaleUVBilinearUp2(int src_width,
assert(src_width == ((dst_width + 1) / 2));
assert(src_height == ((dst_height + 1) / 2));
-#ifdef HAS_SCALEUVROWUP2BILINEAR_SSSE3
+#ifdef HAS_SCALEUVROWUP2_BILINEAR_SSSE3
if (TestCpuFlag(kCpuHasSSSE3)) {
Scale2RowUp = ScaleUVRowUp2_Bilinear_Any_SSSE3;
}
#endif
-#ifdef HAS_SCALEUVROWUP2BILINEAR_AVX2
+#ifdef HAS_SCALEUVROWUP2_BILINEAR_AVX2
if (TestCpuFlag(kCpuHasAVX2)) {
Scale2RowUp = ScaleUVRowUp2_Bilinear_Any_AVX2;
}
#endif
-#ifdef HAS_SCALEUVROWUP2BILINEAR_NEON
+#ifdef HAS_SCALEUVROWUP2_BILINEAR_NEON
if (TestCpuFlag(kCpuHasNEON)) {
Scale2RowUp = ScaleUVRowUp2_Bilinear_Any_NEON;
}
#endif
+#ifdef HAS_SCALEUVROWUP2_BILINEAR_RVV
+ if (TestCpuFlag(kCpuHasRVV)) {
+ Scale2RowUp = ScaleUVRowUp2_Bilinear_RVV;
+ }
+#endif
+
Scale2RowUp(src_ptr, 0, dst_ptr, 0, dst_width);
dst_ptr += dst_stride;
for (x = 0; x < src_height - 1; ++x) {
@@ -734,14 +784,14 @@ void ScaleUVBilinearUp2(int src_width,
// This is an optimized version for scaling up a plane to 2 times of
// its original width, using linear interpolation.
// This is used to scale U and V planes of P210 to P410.
-void ScaleUVLinearUp2_16(int src_width,
- int src_height,
- int dst_width,
- int dst_height,
- int src_stride,
- int dst_stride,
- const uint16_t* src_uv,
- uint16_t* dst_uv) {
+static void ScaleUVLinearUp2_16(int src_width,
+ int src_height,
+ int dst_width,
+ int dst_height,
+ int src_stride,
+ int dst_stride,
+ const uint16_t* src_uv,
+ uint16_t* dst_uv) {
void (*ScaleRowUp)(const uint16_t* src_uv, uint16_t* dst_uv, int dst_width) =
ScaleUVRowUp2_Linear_16_Any_C;
int i;
@@ -751,32 +801,32 @@ void ScaleUVLinearUp2_16(int src_width,
// This function can only scale up by 2 times horizontally.
assert(src_width == ((dst_width + 1) / 2));
-#ifdef HAS_SCALEUVROWUP2LINEAR_16_SSE41
+#ifdef HAS_SCALEUVROWUP2_LINEAR_16_SSE41
if (TestCpuFlag(kCpuHasSSE41)) {
ScaleRowUp = ScaleUVRowUp2_Linear_16_Any_SSE41;
}
#endif
-#ifdef HAS_SCALEUVROWUP2LINEAR_16_AVX2
+#ifdef HAS_SCALEUVROWUP2_LINEAR_16_AVX2
if (TestCpuFlag(kCpuHasAVX2)) {
ScaleRowUp = ScaleUVRowUp2_Linear_16_Any_AVX2;
}
#endif
-#ifdef HAS_SCALEUVROWUP2LINEAR_16_NEON
+#ifdef HAS_SCALEUVROWUP2_LINEAR_16_NEON
if (TestCpuFlag(kCpuHasNEON)) {
ScaleRowUp = ScaleUVRowUp2_Linear_16_Any_NEON;
}
#endif
if (dst_height == 1) {
- ScaleRowUp(src_uv + ((src_height - 1) / 2) * (int64_t)src_stride, dst_uv,
+ ScaleRowUp(src_uv + ((src_height - 1) / 2) * (intptr_t)src_stride, dst_uv,
dst_width);
} else {
dy = FixedDiv(src_height - 1, dst_height - 1);
y = (1 << 15) - 1;
for (i = 0; i < dst_height; ++i) {
- ScaleRowUp(src_uv + (y >> 16) * (int64_t)src_stride, dst_uv, dst_width);
+ ScaleRowUp(src_uv + (y >> 16) * (intptr_t)src_stride, dst_uv, dst_width);
dst_uv += dst_stride;
y += dy;
}
@@ -787,14 +837,14 @@ void ScaleUVLinearUp2_16(int src_width,
// This is an optimized version for scaling up a plane to 2 times of
// its original size, using bilinear interpolation.
// This is used to scale U and V planes of P010 to P410.
-void ScaleUVBilinearUp2_16(int src_width,
- int src_height,
- int dst_width,
- int dst_height,
- int src_stride,
- int dst_stride,
- const uint16_t* src_ptr,
- uint16_t* dst_ptr) {
+static void ScaleUVBilinearUp2_16(int src_width,
+ int src_height,
+ int dst_width,
+ int dst_height,
+ int src_stride,
+ int dst_stride,
+ const uint16_t* src_ptr,
+ uint16_t* dst_ptr) {
void (*Scale2RowUp)(const uint16_t* src_ptr, ptrdiff_t src_stride,
uint16_t* dst_ptr, ptrdiff_t dst_stride, int dst_width) =
ScaleUVRowUp2_Bilinear_16_Any_C;
@@ -804,19 +854,19 @@ void ScaleUVBilinearUp2_16(int src_width,
assert(src_width == ((dst_width + 1) / 2));
assert(src_height == ((dst_height + 1) / 2));
-#ifdef HAS_SCALEUVROWUP2BILINEAR_16_SSE41
+#ifdef HAS_SCALEUVROWUP2_BILINEAR_16_SSE41
if (TestCpuFlag(kCpuHasSSE41)) {
Scale2RowUp = ScaleUVRowUp2_Bilinear_16_Any_SSE41;
}
#endif
-#ifdef HAS_SCALEUVROWUP2BILINEAR_16_AVX2
+#ifdef HAS_SCALEUVROWUP2_BILINEAR_16_AVX2
if (TestCpuFlag(kCpuHasAVX2)) {
Scale2RowUp = ScaleUVRowUp2_Bilinear_16_Any_AVX2;
}
#endif
-#ifdef HAS_SCALEUVROWUP2BILINEAR_16_NEON
+#ifdef HAS_SCALEUVROWUP2_BILINEAR_16_NEON
if (TestCpuFlag(kCpuHasNEON)) {
Scale2RowUp = ScaleUVRowUp2_Bilinear_16_Any_NEON;
}
@@ -854,7 +904,7 @@ static void ScaleUVSimple(int src_width,
int y,
int dy) {
int j;
- void (*ScaleUVCols)(uint8_t * dst_uv, const uint8_t* src_uv, int dst_width,
+ void (*ScaleUVCols)(uint8_t* dst_uv, const uint8_t* src_uv, int dst_width,
int x, int dx) =
(src_width >= 32768) ? ScaleUVCols64_C : ScaleUVCols_C;
(void)src_height;
@@ -889,7 +939,7 @@ static void ScaleUVSimple(int src_width,
}
for (j = 0; j < dst_height; ++j) {
- ScaleUVCols(dst_uv, src_uv + (y >> 16) * (int64_t)src_stride, dst_width, x,
+ ScaleUVCols(dst_uv, src_uv + (y >> 16) * (intptr_t)src_stride, dst_width, x,
dx);
dst_uv += dst_stride;
y += dy;
@@ -910,7 +960,7 @@ static int UVCopy(const uint8_t* src_uv,
// Negative height means invert the image.
if (height < 0) {
height = -height;
- src_uv = src_uv + (height - 1) * (int64_t)src_stride_uv;
+ src_uv = src_uv + (height - 1) * (intptr_t)src_stride_uv;
src_stride_uv = -src_stride_uv;
}
@@ -930,7 +980,7 @@ static int UVCopy_16(const uint16_t* src_uv,
// Negative height means invert the image.
if (height < 0) {
height = -height;
- src_uv = src_uv + (height - 1) * (int64_t)src_stride_uv;
+ src_uv = src_uv + (height - 1) * (intptr_t)src_stride_uv;
src_stride_uv = -src_stride_uv;
}
@@ -942,19 +992,19 @@ static int UVCopy_16(const uint16_t* src_uv,
// Scale a UV plane (from NV12)
// This function in turn calls a scaling function
// suitable for handling the desired resolutions.
-static void ScaleUV(const uint8_t* src,
- int src_stride,
- int src_width,
- int src_height,
- uint8_t* dst,
- int dst_stride,
- int dst_width,
- int dst_height,
- int clip_x,
- int clip_y,
- int clip_width,
- int clip_height,
- enum FilterMode filtering) {
+static int ScaleUV(const uint8_t* src,
+ int src_stride,
+ int src_width,
+ int src_height,
+ uint8_t* dst,
+ int dst_stride,
+ int dst_width,
+ int dst_height,
+ int clip_x,
+ int clip_y,
+ int clip_width,
+ int clip_height,
+ enum FilterMode filtering) {
// Initial source x/y coordinate and step values as 16.16 fixed point.
int x = 0;
int y = 0;
@@ -968,7 +1018,7 @@ static void ScaleUV(const uint8_t* src,
// Negative src_height means invert the image.
if (src_height < 0) {
src_height = -src_height;
- src = src + (src_height - 1) * (int64_t)src_stride;
+ src = src + (src_height - 1) * (intptr_t)src_stride;
src_stride = -src_stride;
}
ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y,
@@ -983,7 +1033,7 @@ static void ScaleUV(const uint8_t* src,
if (clip_y) {
int64_t clipf = (int64_t)(clip_y)*dy;
y += (clipf & 0xffff);
- src += (clipf >> 16) * (int64_t)src_stride;
+ src += (clipf >> 16) * (intptr_t)src_stride;
dst += clip_y * dst_stride;
}
@@ -1000,22 +1050,22 @@ static void ScaleUV(const uint8_t* src,
ScaleUVDown2(src_width, src_height, clip_width, clip_height,
src_stride, dst_stride, src, dst, x, dx, y, dy,
filtering);
- return;
+ return 0;
}
#endif
#if HAS_SCALEUVDOWN4BOX
if (dx == 0x40000 && filtering == kFilterBox) {
// Optimized 1/4 box downsample.
- ScaleUVDown4Box(src_width, src_height, clip_width, clip_height,
- src_stride, dst_stride, src, dst, x, dx, y, dy);
- return;
+ return ScaleUVDown4Box(src_width, src_height, clip_width, clip_height,
+ src_stride, dst_stride, src, dst, x, dx, y,
+ dy);
}
#endif
#if HAS_SCALEUVDOWNEVEN
ScaleUVDownEven(src_width, src_height, clip_width, clip_height,
src_stride, dst_stride, src, dst, x, dx, y, dy,
filtering);
- return;
+ return 0;
#endif
}
// Optimized odd scale down. ie 3, 5, 7, 9x.
@@ -1024,9 +1074,9 @@ static void ScaleUV(const uint8_t* src,
#ifdef HAS_UVCOPY
if (dx == 0x10000 && dy == 0x10000) {
// Straight copy.
- UVCopy(src + (y >> 16) * (int64_t)src_stride + (x >> 16) * 2,
+ UVCopy(src + (y >> 16) * (intptr_t)src_stride + (x >> 16) * 2,
src_stride, dst, dst_stride, clip_width, clip_height);
- return;
+ return 0;
}
#endif
}
@@ -1037,38 +1087,37 @@ static void ScaleUV(const uint8_t* src,
// Arbitrary scale vertically, but unscaled horizontally.
ScalePlaneVertical(src_height, clip_width, clip_height, src_stride,
dst_stride, src, dst, x, y, dy, /*bpp=*/2, filtering);
- return;
+ return 0;
}
- if (filtering && (dst_width + 1) / 2 == src_width) {
+ if ((filtering == kFilterLinear) && ((dst_width + 1) / 2 == src_width)) {
ScaleUVLinearUp2(src_width, src_height, clip_width, clip_height, src_stride,
dst_stride, src, dst);
- return;
+ return 0;
}
if ((clip_height + 1) / 2 == src_height &&
(clip_width + 1) / 2 == src_width &&
(filtering == kFilterBilinear || filtering == kFilterBox)) {
ScaleUVBilinearUp2(src_width, src_height, clip_width, clip_height,
src_stride, dst_stride, src, dst);
- return;
+ return 0;
}
#if HAS_SCALEUVBILINEARUP
if (filtering && dy < 65536) {
- ScaleUVBilinearUp(src_width, src_height, clip_width, clip_height,
- src_stride, dst_stride, src, dst, x, dx, y, dy,
- filtering);
- return;
+ return ScaleUVBilinearUp(src_width, src_height, clip_width, clip_height,
+ src_stride, dst_stride, src, dst, x, dx, y, dy,
+ filtering);
}
#endif
#if HAS_SCALEUVBILINEARDOWN
if (filtering) {
- ScaleUVBilinearDown(src_width, src_height, clip_width, clip_height,
- src_stride, dst_stride, src, dst, x, dx, y, dy,
- filtering);
- return;
+ return ScaleUVBilinearDown(src_width, src_height, clip_width, clip_height,
+ src_stride, dst_stride, src, dst, x, dx, y, dy,
+ filtering);
}
#endif
ScaleUVSimple(src_width, src_height, clip_width, clip_height, src_stride,
dst_stride, src, dst, x, dx, y, dy);
+ return 0;
}
// Scale an UV image.
@@ -1086,9 +1135,9 @@ int UVScale(const uint8_t* src_uv,
src_height > 32768 || !dst_uv || dst_width <= 0 || dst_height <= 0) {
return -1;
}
- ScaleUV(src_uv, src_stride_uv, src_width, src_height, dst_uv, dst_stride_uv,
- dst_width, dst_height, 0, 0, dst_width, dst_height, filtering);
- return 0;
+ return ScaleUV(src_uv, src_stride_uv, src_width, src_height, dst_uv,
+ dst_stride_uv, dst_width, dst_height, 0, 0, dst_width,
+ dst_height, filtering);
}
// Scale a 16 bit UV image.
@@ -1118,7 +1167,7 @@ int UVScale_16(const uint16_t* src_uv,
// Negative src_height means invert the image.
if (src_height < 0) {
src_height = -src_height;
- src_uv = src_uv + (src_height - 1) * (int64_t)src_stride_uv;
+ src_uv = src_uv + (src_height - 1) * (intptr_t)src_stride_uv;
src_stride_uv = -src_stride_uv;
}
src_width = Abs(src_width);
@@ -1126,20 +1175,20 @@ int UVScale_16(const uint16_t* src_uv,
#ifdef HAS_UVCOPY
if (!filtering && src_width == dst_width && (src_height % dst_height == 0)) {
if (dst_height == 1) {
- UVCopy_16(src_uv + ((src_height - 1) / 2) * (int64_t)src_stride_uv,
+ UVCopy_16(src_uv + ((src_height - 1) / 2) * (intptr_t)src_stride_uv,
src_stride_uv, dst_uv, dst_stride_uv, dst_width, dst_height);
} else {
dy = src_height / dst_height;
- UVCopy_16(src_uv + ((dy - 1) / 2) * (int64_t)src_stride_uv,
- dy * (int64_t)src_stride_uv, dst_uv, dst_stride_uv, dst_width,
- dst_height);
+ UVCopy_16(src_uv + ((dy - 1) / 2) * (intptr_t)src_stride_uv,
+ (int)(dy * (intptr_t)src_stride_uv), dst_uv, dst_stride_uv,
+ dst_width, dst_height);
}
return 0;
}
#endif
- if (filtering && (dst_width + 1) / 2 == src_width) {
+ if ((filtering == kFilterLinear) && ((dst_width + 1) / 2 == src_width)) {
ScaleUVLinearUp2_16(src_width, src_height, dst_width, dst_height,
src_stride_uv, dst_stride_uv, src_uv, dst_uv);
return 0;
diff --git a/files/source/scale_win.cc b/source/scale_win.cc
index ea1f95c6..ea1f95c6 100644
--- a/files/source/scale_win.cc
+++ b/source/scale_win.cc
diff --git a/files/source/test.sh b/source/test.sh
index 7f12c3c1..7f12c3c1 100755
--- a/files/source/test.sh
+++ b/source/test.sh
diff --git a/files/source/video_common.cc b/source/video_common.cc
index 92384c05..92384c05 100644
--- a/files/source/video_common.cc
+++ b/source/video_common.cc
diff --git a/tools_libyuv/OWNERS b/tools_libyuv/OWNERS
new file mode 100644
index 00000000..aae4fb6e
--- /dev/null
+++ b/tools_libyuv/OWNERS
@@ -0,0 +1,4 @@
+mbonadei@chromium.org
+fbarchard@chromium.org
+pbos@chromium.org
+
diff --git a/tools_libyuv/autoroller/roll_deps.py b/tools_libyuv/autoroller/roll_deps.py
new file mode 100755
index 00000000..d5c1089f
--- /dev/null
+++ b/tools_libyuv/autoroller/roll_deps.py
@@ -0,0 +1,822 @@
+#!/usr/bin/env vpython3
+
+# Copyright (c) 2017 The LibYUV project authors. All Rights Reserved.
+#
+# Use of this source code is governed by a BSD-style license
+# that can be found in the LICENSE file in the root of the source
+# tree. An additional intellectual property rights grant can be found
+# in the file PATENTS. All contributing project authors may
+# be found in the AUTHORS file in the root of the source tree.
+"""Script to automatically roll dependencies in the LibYUV DEPS file."""
+
+
+import argparse
+import base64
+import collections
+import logging
+import os
+import re
+import subprocess
+import sys
+import urllib.request
+
+
+def FindSrcDirPath():
+ """Returns the abs path to the src/ dir of the project."""
+ src_dir = os.path.dirname(os.path.abspath(__file__))
+ while os.path.basename(src_dir) != 'src':
+ src_dir = os.path.normpath(os.path.join(src_dir, os.pardir))
+ return src_dir
+
+
+# Skip these dependencies (list without solution name prefix).
+DONT_AUTOROLL_THESE = [
+ 'third_party/fuchsia-gn-sdk',
+ 'src/third_party/gflags/src',
+ 'src/third_party/mockito/src',
+]
+
+# These dependencies are missing in chromium/src/DEPS, either unused or already
+# in-tree. For instance, src/base is a part of the Chromium source git repo,
+# but we pull it through a subtree mirror, so therefore it isn't listed in
+# Chromium's deps but it is in ours.
+LIBYUV_ONLY_DEPS = [
+ 'src/base',
+ 'src/build',
+ 'src/buildtools',
+ 'src/ios',
+ 'src/testing',
+ 'src/third_party',
+ 'src/third_party/android_support_test_runner',
+ 'src/third_party/bazel',
+ 'src/third_party/bouncycastle',
+ 'src/third_party/errorprone/lib',
+ 'src/third_party/findbugs',
+ 'src/third_party/gson',
+ 'src/third_party/gtest-parallel',
+ 'src/third_party/guava',
+ 'src/third_party/intellij',
+ 'src/third_party/jsr-305/src',
+ 'src/third_party/ow2_asm',
+ 'src/third_party/proguard',
+ 'src/third_party/ub-uiautomator/lib',
+ 'src/tools',
+ 'src/tools/clang/dsymutil',
+]
+
+LIBYUV_URL = 'https://chromium.googlesource.com/libyuv/libyuv'
+CHROMIUM_SRC_URL = 'https://chromium.googlesource.com/chromium/src'
+CHROMIUM_COMMIT_TEMPLATE = CHROMIUM_SRC_URL + '/+/%s'
+CHROMIUM_LOG_TEMPLATE = CHROMIUM_SRC_URL + '/+log/%s'
+CHROMIUM_FILE_TEMPLATE = CHROMIUM_SRC_URL + '/+/%s/%s'
+
+COMMIT_POSITION_RE = re.compile('^Cr-Commit-Position: .*#([0-9]+).*$')
+CLANG_REVISION_RE = re.compile(r'^CLANG_REVISION = \'([-0-9a-z]+)\'$')
+ROLL_BRANCH_NAME = 'roll_chromium_revision'
+
+SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
+CHECKOUT_SRC_DIR = FindSrcDirPath()
+CHECKOUT_ROOT_DIR = os.path.realpath(os.path.join(CHECKOUT_SRC_DIR, os.pardir))
+
+# Copied from tools/android/roll/android_deps/.../BuildConfigGenerator.groovy.
+ANDROID_DEPS_START = r'=== ANDROID_DEPS Generated Code Start ==='
+ANDROID_DEPS_END = r'=== ANDROID_DEPS Generated Code End ==='
+# Location of automically gathered android deps.
+ANDROID_DEPS_PATH = 'src/third_party/android_deps/'
+
+sys.path.append(os.path.join(CHECKOUT_SRC_DIR, 'build'))
+import find_depot_tools
+
+find_depot_tools.add_depot_tools_to_path()
+
+CLANG_UPDATE_SCRIPT_URL_PATH = 'tools/clang/scripts/update.py'
+CLANG_UPDATE_SCRIPT_LOCAL_PATH = os.path.join(CHECKOUT_SRC_DIR, 'tools',
+ 'clang', 'scripts', 'update.py')
+
+DepsEntry = collections.namedtuple('DepsEntry', 'path url revision')
+ChangedDep = collections.namedtuple('ChangedDep',
+ 'path url current_rev new_rev')
+CipdDepsEntry = collections.namedtuple('CipdDepsEntry', 'path packages')
+VersionEntry = collections.namedtuple('VersionEntry', 'version')
+ChangedCipdPackage = collections.namedtuple(
+ 'ChangedCipdPackage', 'path package current_version new_version')
+ChangedVersionEntry = collections.namedtuple(
+ 'ChangedVersionEntry', 'path current_version new_version')
+
+ChromiumRevisionUpdate = collections.namedtuple('ChromiumRevisionUpdate',
+ ('current_chromium_rev '
+ 'new_chromium_rev '))
+
+
+class RollError(Exception):
+ pass
+
+
+def StrExpansion():
+ return lambda str_value: str_value
+
+
+def VarLookup(local_scope):
+ return lambda var_name: local_scope['vars'][var_name]
+
+
+def ParseDepsDict(deps_content):
+ local_scope = {}
+ global_scope = {
+ 'Str': StrExpansion(),
+ 'Var': VarLookup(local_scope),
+ 'deps_os': {},
+ }
+ exec(deps_content, global_scope, local_scope)
+ return local_scope
+
+
+def ParseLocalDepsFile(filename):
+ with open(filename, 'rb') as f:
+ deps_content = f.read().decode('utf-8')
+ return ParseDepsDict(deps_content)
+
+
+def ParseCommitPosition(commit_message):
+ for line in reversed(commit_message.splitlines()):
+ m = COMMIT_POSITION_RE.match(line.strip())
+ if m:
+ return int(m.group(1))
+ logging.error('Failed to parse commit position id from:\n%s\n',
+ commit_message)
+ sys.exit(-1)
+
+
+def _RunCommand(command,
+ working_dir=None,
+ ignore_exit_code=False,
+ extra_env=None,
+ input_data=None):
+ """Runs a command and returns the output from that command.
+
+ If the command fails (exit code != 0), the function will exit the process.
+
+ Returns:
+ A tuple containing the stdout and stderr outputs as strings.
+ """
+ working_dir = working_dir or CHECKOUT_SRC_DIR
+ logging.debug('CMD: %s CWD: %s', ' '.join(command), working_dir)
+ env = os.environ.copy()
+ if extra_env:
+ assert all(isinstance(value, str) for value in extra_env.values())
+ logging.debug('extra env: %s', extra_env)
+ env.update(extra_env)
+ p = subprocess.Popen(command,
+ stdin=subprocess.PIPE,
+ stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE,
+ env=env,
+ cwd=working_dir,
+ universal_newlines=True)
+ std_output, err_output = p.communicate(input_data)
+ p.stdout.close()
+ p.stderr.close()
+ if not ignore_exit_code and p.returncode != 0:
+ logging.error('Command failed: %s\n'
+ 'stdout:\n%s\n'
+ 'stderr:\n%s\n', ' '.join(command), std_output, err_output)
+ sys.exit(p.returncode)
+ return std_output, err_output
+
+
+def _GetBranches():
+ """Returns a tuple of active,branches.
+
+ The 'active' is the name of the currently active branch and 'branches' is a
+ list of all branches.
+ """
+ lines = _RunCommand(['git', 'branch'])[0].split('\n')
+ branches = []
+ active = ''
+ for line in lines:
+ if '*' in line:
+ # The assumption is that the first char will always be the '*'.
+ active = line[1:].strip()
+ branches.append(active)
+ else:
+ branch = line.strip()
+ if branch:
+ branches.append(branch)
+ return active, branches
+
+
+def _ReadGitilesContent(url):
+ # Download and decode BASE64 content until
+ # https://code.google.com/p/gitiles/issues/detail?id=7 is fixed.
+ base64_content = ReadUrlContent(url + '?format=TEXT')
+ return base64.b64decode(base64_content[0]).decode('utf-8')
+
+
+def ReadRemoteCrFile(path_below_src, revision):
+ """Reads a remote Chromium file of a specific revision.
+
+ Args:
+ path_below_src: A path to the target file relative to src dir.
+ revision: Revision to read.
+ Returns:
+ A string with file content.
+ """
+ return _ReadGitilesContent(CHROMIUM_FILE_TEMPLATE %
+ (revision, path_below_src))
+
+
+def ReadRemoteCrCommit(revision):
+ """Reads a remote Chromium commit message. Returns a string."""
+ return _ReadGitilesContent(CHROMIUM_COMMIT_TEMPLATE % revision)
+
+
+def ReadUrlContent(url):
+ """Connect to a remote host and read the contents.
+
+ Args:
+ url: URL to connect to.
+ Returns:
+ A list of lines.
+ """
+ conn = urllib.request.urlopen(url)
+ try:
+ return conn.readlines()
+ except IOError as e:
+ logging.exception('Error connecting to %s. Error: %s', url, e)
+ raise
+ finally:
+ conn.close()
+
+
+def GetMatchingDepsEntries(depsentry_dict, dir_path):
+ """Gets all deps entries matching the provided path.
+
+ This list may contain more than one DepsEntry object.
+ Example: dir_path='src/testing' would give results containing both
+ 'src/testing/gtest' and 'src/testing/gmock' deps entries for Chromium's
+ DEPS.
+ Example 2: dir_path='src/build' should return 'src/build' but not
+ 'src/buildtools'.
+
+ Returns:
+ A list of DepsEntry objects.
+ """
+ result = []
+ for path, depsentry in depsentry_dict.items():
+ if path == dir_path:
+ result.append(depsentry)
+ else:
+ parts = path.split('/')
+ if all(part == parts[i] for i, part in enumerate(dir_path.split('/'))):
+ result.append(depsentry)
+ return result
+
+
+def BuildDepsentryDict(deps_dict):
+ """Builds a dict of paths to DepsEntry objects from a raw deps dict."""
+ result = {}
+
+ def AddDepsEntries(deps_subdict):
+ for path, dep in deps_subdict.items():
+ if path in result:
+ continue
+ if not isinstance(dep, dict):
+ dep = {'url': dep}
+ if dep.get('dep_type') == 'cipd':
+ result[path] = CipdDepsEntry(path, dep['packages'])
+ else:
+ if '@' not in dep['url']:
+ continue
+ url, revision = dep['url'].split('@')
+ result[path] = DepsEntry(path, url, revision)
+
+ def AddVersionEntry(vars_subdict):
+ for key, value in vars_subdict.items():
+ if key in result:
+ continue
+ if not key.endswith('_version'):
+ continue
+ key = re.sub('_version$', '', key)
+ result[key] = VersionEntry(value)
+
+ AddDepsEntries(deps_dict['deps'])
+ for deps_os in ['win', 'mac', 'unix', 'android', 'ios', 'unix']:
+ AddDepsEntries(deps_dict.get('deps_os', {}).get(deps_os, {}))
+ AddVersionEntry(deps_dict.get('vars', {}))
+ return result
+
+
+def _FindChangedCipdPackages(path, old_pkgs, new_pkgs):
+ old_pkgs_names = {p['package'] for p in old_pkgs}
+ new_pkgs_names = {p['package'] for p in new_pkgs}
+ pkgs_equal = (old_pkgs_names == new_pkgs_names)
+ added_pkgs = [p for p in new_pkgs_names if p not in old_pkgs_names]
+ removed_pkgs = [p for p in old_pkgs_names if p not in new_pkgs_names]
+
+ assert pkgs_equal, ('Old: %s\n New: %s.\nYou need to do a manual roll '
+ 'and remove/add entries in DEPS so the old and new '
+ 'list match.\nMost likely, you should add \"%s\" and '
+ 'remove \"%s\"' %
+ (old_pkgs, new_pkgs, added_pkgs, removed_pkgs))
+
+ for old_pkg in old_pkgs:
+ for new_pkg in new_pkgs:
+ old_version = old_pkg['version']
+ new_version = new_pkg['version']
+ if (old_pkg['package'] == new_pkg['package']
+ and old_version != new_version):
+ logging.debug('Roll dependency %s to %s', path, new_version)
+ yield ChangedCipdPackage(path, old_pkg['package'], old_version,
+ new_version)
+
+
+def _FindChangedVars(name, old_version, new_version):
+ if old_version != new_version:
+ logging.debug('Roll dependency %s to %s', name, new_version)
+ yield ChangedVersionEntry(name, old_version, new_version)
+
+
+def _FindNewDeps(old, new):
+ """ Gather dependencies only in `new` and return corresponding paths. """
+ old_entries = set(BuildDepsentryDict(old))
+ new_entries = set(BuildDepsentryDict(new))
+ return [
+ path for path in new_entries - old_entries
+ if path not in DONT_AUTOROLL_THESE
+ ]
+
+
+def FindAddedDeps(libyuv_deps, new_cr_deps):
+ """
+ Calculate new deps entries of interest.
+
+ Ideally, that would mean: only appearing in chromium DEPS
+ but transitively used in LibYUV.
+
+ Since it's hard to compute, we restrict ourselves to a well defined subset:
+ deps sitting in `ANDROID_DEPS_PATH`.
+ Otherwise, assumes that's a Chromium-only dependency.
+
+ Args:
+ libyuv_deps: dict of deps as defined in the LibYUV DEPS file.
+ new_cr_deps: dict of deps as defined in the chromium DEPS file.
+
+ Caveat: Doesn't detect a new package in existing dep.
+
+ Returns:
+ A tuple consisting of:
+ A list of paths added dependencies sitting in `ANDROID_DEPS_PATH`.
+ A list of paths for other added dependencies.
+ """
+ all_added_deps = _FindNewDeps(libyuv_deps, new_cr_deps)
+ generated_android_deps = [
+ path for path in all_added_deps if path.startswith(ANDROID_DEPS_PATH)
+ ]
+ other_deps = [
+ path for path in all_added_deps if path not in generated_android_deps
+ ]
+ return generated_android_deps, other_deps
+
+
+def FindRemovedDeps(libyuv_deps, new_cr_deps):
+ """
+ Calculate obsolete deps entries.
+
+ Ideally, that would mean: no more appearing in chromium DEPS
+ and not used in LibYUV.
+
+ Since it's hard to compute:
+ 1/ We restrict ourselves to a well defined subset:
+ deps sitting in `ANDROID_DEPS_PATH`.
+ 2/ We rely on existing behavior of CalculateChangeDeps.
+ I.e. Assumes non-CIPD dependencies are LibYUV-only, don't remove them.
+
+ Args:
+ libyuv_deps: dict of deps as defined in the LibYUV DEPS file.
+ new_cr_deps: dict of deps as defined in the chromium DEPS file.
+
+ Caveat: Doesn't detect a deleted package in existing dep.
+
+ Returns:
+ A tuple consisting of:
+ A list of paths of dependencies removed from `ANDROID_DEPS_PATH`.
+ A list of paths of unexpected disappearing dependencies.
+ """
+ all_removed_deps = _FindNewDeps(new_cr_deps, libyuv_deps)
+ generated_android_deps = sorted(
+ [path for path in all_removed_deps if path.startswith(ANDROID_DEPS_PATH)])
+ # Webrtc-only dependencies are handled in CalculateChangedDeps.
+ other_deps = sorted([
+ path for path in all_removed_deps
+ if path not in generated_android_deps and path not in LIBYUV_ONLY_DEPS
+ ])
+ return generated_android_deps, other_deps
+
+
+def CalculateChangedDeps(libyuv_deps, new_cr_deps):
+ """
+ Calculate changed deps entries based on entries defined in the LibYUV DEPS
+ file:
+ - If a shared dependency with the Chromium DEPS file: roll it to the same
+ revision as Chromium (i.e. entry in the new_cr_deps dict)
+ - If it's a Chromium sub-directory, roll it to the HEAD revision (notice
+ this means it may be ahead of the chromium_revision, but generally these
+ should be close).
+ - If it's another DEPS entry (not shared with Chromium), roll it to HEAD
+ unless it's configured to be skipped.
+
+ Returns:
+ A list of ChangedDep objects representing the changed deps.
+ """
+ result = []
+ libyuv_entries = BuildDepsentryDict(libyuv_deps)
+ new_cr_entries = BuildDepsentryDict(new_cr_deps)
+ for path, libyuv_deps_entry in libyuv_entries.items():
+ if path in DONT_AUTOROLL_THESE:
+ continue
+ cr_deps_entry = new_cr_entries.get(path)
+ if cr_deps_entry:
+ assert type(cr_deps_entry) is type(libyuv_deps_entry)
+
+ if isinstance(cr_deps_entry, CipdDepsEntry):
+ result.extend(
+ _FindChangedCipdPackages(path, libyuv_deps_entry.packages,
+ cr_deps_entry.packages))
+ continue
+
+ if isinstance(cr_deps_entry, VersionEntry):
+ result.extend(
+ _FindChangedVars(path, libyuv_deps_entry.version,
+ cr_deps_entry.version))
+ continue
+
+ # Use the revision from Chromium's DEPS file.
+ new_rev = cr_deps_entry.revision
+ assert libyuv_deps_entry.url == cr_deps_entry.url, (
+ 'LibYUV DEPS entry %s has a different URL %s than Chromium %s.' %
+ (path, libyuv_deps_entry.url, cr_deps_entry.url))
+ else:
+ if isinstance(libyuv_deps_entry, DepsEntry):
+ # Use the HEAD of the deps repo.
+ stdout, _ = _RunCommand(
+ ['git', 'ls-remote', libyuv_deps_entry.url, 'HEAD'])
+ new_rev = stdout.strip().split('\t')[0]
+ else:
+ # The dependency has been removed from chromium.
+ # This is handled by FindRemovedDeps.
+ continue
+
+ # Check if an update is necessary.
+ if libyuv_deps_entry.revision != new_rev:
+ logging.debug('Roll dependency %s to %s', path, new_rev)
+ result.append(
+ ChangedDep(path, libyuv_deps_entry.url, libyuv_deps_entry.revision,
+ new_rev))
+ return sorted(result)
+
+
+def CalculateChangedClang(new_cr_rev):
+
+ def GetClangRev(lines):
+ for line in lines:
+ match = CLANG_REVISION_RE.match(line)
+ if match:
+ return match.group(1)
+ raise RollError('Could not parse Clang revision!')
+
+ with open(CLANG_UPDATE_SCRIPT_LOCAL_PATH, 'r') as f:
+ current_lines = f.readlines()
+ current_rev = GetClangRev(current_lines)
+
+ new_clang_update_py = ReadRemoteCrFile(CLANG_UPDATE_SCRIPT_URL_PATH,
+ new_cr_rev).splitlines()
+ new_rev = GetClangRev(new_clang_update_py)
+ return ChangedDep(CLANG_UPDATE_SCRIPT_LOCAL_PATH, None, current_rev, new_rev)
+
+
+def GenerateCommitMessage(
+ rev_update,
+ current_commit_pos,
+ new_commit_pos,
+ changed_deps_list,
+ added_deps_paths=None,
+ removed_deps_paths=None,
+ clang_change=None,
+):
+ current_cr_rev = rev_update.current_chromium_rev[0:10]
+ new_cr_rev = rev_update.new_chromium_rev[0:10]
+ rev_interval = '%s..%s' % (current_cr_rev, new_cr_rev)
+ git_number_interval = '%s:%s' % (current_commit_pos, new_commit_pos)
+
+ commit_msg = [
+ 'Roll chromium_revision %s (%s)\n' % (rev_interval, git_number_interval),
+ 'Change log: %s' % (CHROMIUM_LOG_TEMPLATE % rev_interval),
+ 'Full diff: %s\n' % (CHROMIUM_COMMIT_TEMPLATE % rev_interval)
+ ]
+
+ def Section(adjective, deps):
+ noun = 'dependency' if len(deps) == 1 else 'dependencies'
+ commit_msg.append('%s %s' % (adjective, noun))
+
+ if changed_deps_list:
+ Section('Changed', changed_deps_list)
+
+ for c in changed_deps_list:
+ if isinstance(c, ChangedCipdPackage):
+ commit_msg.append('* %s: %s..%s' %
+ (c.path, c.current_version, c.new_version))
+ elif isinstance(c, ChangedVersionEntry):
+ commit_msg.append('* %s_vesion: %s..%s' %
+ (c.path, c.current_version, c.new_version))
+ else:
+ commit_msg.append('* %s: %s/+log/%s..%s' %
+ (c.path, c.url, c.current_rev[0:10], c.new_rev[0:10]))
+
+ if added_deps_paths:
+ Section('Added', added_deps_paths)
+ commit_msg.extend('* %s' % p for p in added_deps_paths)
+
+ if removed_deps_paths:
+ Section('Removed', removed_deps_paths)
+ commit_msg.extend('* %s' % p for p in removed_deps_paths)
+
+ if any([changed_deps_list, added_deps_paths, removed_deps_paths]):
+ change_url = CHROMIUM_FILE_TEMPLATE % (rev_interval, 'DEPS')
+ commit_msg.append('DEPS diff: %s\n' % change_url)
+ else:
+ commit_msg.append('No dependencies changed.')
+
+ if clang_change and clang_change.current_rev != clang_change.new_rev:
+ commit_msg.append('Clang version changed %s:%s' %
+ (clang_change.current_rev, clang_change.new_rev))
+ change_url = CHROMIUM_FILE_TEMPLATE % (rev_interval,
+ CLANG_UPDATE_SCRIPT_URL_PATH)
+ commit_msg.append('Details: %s\n' % change_url)
+ else:
+ commit_msg.append('No update to Clang.\n')
+
+ commit_msg.append('BUG=None')
+ return '\n'.join(commit_msg)
+
+
+def UpdateDepsFile(deps_filename, rev_update, changed_deps, new_cr_content):
+ """Update the DEPS file with the new revision."""
+
+ with open(deps_filename, 'rb') as deps_file:
+ deps_content = deps_file.read().decode('utf-8')
+
+ # Update the chromium_revision variable.
+ deps_content = deps_content.replace(rev_update.current_chromium_rev,
+ rev_update.new_chromium_rev)
+
+ # Add and remove dependencies. For now: only generated android deps.
+ # Since gclient cannot add or remove deps, we on the fact that
+ # these android deps are located in one place we can copy/paste.
+ deps_re = re.compile(ANDROID_DEPS_START + '.*' + ANDROID_DEPS_END, re.DOTALL)
+ new_deps = deps_re.search(new_cr_content)
+ old_deps = deps_re.search(deps_content)
+ if not new_deps or not old_deps:
+ faulty = 'Chromium' if not new_deps else 'LibYUV'
+ raise RollError('Was expecting to find "%s" and "%s"\n'
+ 'in %s DEPS' %
+ (ANDROID_DEPS_START, ANDROID_DEPS_END, faulty))
+ deps_content = deps_re.sub(new_deps.group(0), deps_content)
+
+ for dep in changed_deps:
+ if isinstance(dep, ChangedVersionEntry):
+ deps_content = deps_content.replace(dep.current_version, dep.new_version)
+
+ with open(deps_filename, 'wb') as deps_file:
+ deps_file.write(deps_content.encode('utf-8'))
+
+ # Update each individual DEPS entry.
+ for dep in changed_deps:
+ # ChangedVersionEntry types are already been processed.
+ if isinstance(dep, ChangedVersionEntry):
+ continue
+ local_dep_dir = os.path.join(CHECKOUT_ROOT_DIR, dep.path)
+ if not os.path.isdir(local_dep_dir):
+ raise RollError(
+ 'Cannot find local directory %s. Either run\n'
+ 'gclient sync --deps=all\n'
+ 'or make sure the .gclient file for your solution contains all '
+ 'platforms in the target_os list, i.e.\n'
+ 'target_os = ["android", "unix", "mac", "ios", "win"];\n'
+ 'Then run "gclient sync" again.' % local_dep_dir)
+ if isinstance(dep, ChangedCipdPackage):
+ package = dep.package.format() # Eliminate double curly brackets
+ update = '%s:%s@%s' % (dep.path, package, dep.new_version)
+ else:
+ update = '%s@%s' % (dep.path, dep.new_rev)
+ _RunCommand(['gclient', 'setdep', '--revision', update],
+ working_dir=CHECKOUT_SRC_DIR)
+
+
+def _IsTreeClean():
+ stdout, _ = _RunCommand(['git', 'status', '--porcelain'])
+ if len(stdout) == 0:
+ return True
+
+ logging.error('Dirty/unversioned files:\n%s', stdout)
+ return False
+
+
+def _EnsureUpdatedMainBranch(dry_run):
+ current_branch = _RunCommand(['git', 'rev-parse', '--abbrev-ref',
+ 'HEAD'])[0].splitlines()[0]
+ if current_branch != 'main':
+ logging.error('Please checkout the main branch and re-run this script.')
+ if not dry_run:
+ sys.exit(-1)
+
+ logging.info('Updating main branch...')
+ _RunCommand(['git', 'pull'])
+
+
+def _CreateRollBranch(dry_run):
+ logging.info('Creating roll branch: %s', ROLL_BRANCH_NAME)
+ if not dry_run:
+ _RunCommand(['git', 'checkout', '-b', ROLL_BRANCH_NAME])
+
+
+def _RemovePreviousRollBranch(dry_run):
+ active_branch, branches = _GetBranches()
+ if active_branch == ROLL_BRANCH_NAME:
+ active_branch = 'main'
+ if ROLL_BRANCH_NAME in branches:
+ logging.info('Removing previous roll branch (%s)', ROLL_BRANCH_NAME)
+ if not dry_run:
+ _RunCommand(['git', 'checkout', active_branch])
+ _RunCommand(['git', 'branch', '-D', ROLL_BRANCH_NAME])
+
+
+def _LocalCommit(commit_msg, dry_run):
+ logging.info('Committing changes locally.')
+ if not dry_run:
+ _RunCommand(['git', 'add', '--update', '.'])
+ _RunCommand(['git', 'commit', '-m', commit_msg])
+
+
+def ChooseCQMode(skip_cq, cq_over, current_commit_pos, new_commit_pos):
+ if skip_cq:
+ return 0
+ if (new_commit_pos - current_commit_pos) < cq_over:
+ return 1
+ return 2
+
+
+def _GetCcRecipients(changed_deps_list):
+ """Returns a list of emails to notify based on the changed deps list.
+ """
+ cc_recipients = []
+ for c in changed_deps_list:
+ pass
+ return cc_recipients
+
+
+def _UploadCL(commit_queue_mode, add_cc=None):
+ """Upload the committed changes as a changelist to Gerrit.
+
+ commit_queue_mode:
+ - 2: Submit to commit queue.
+ - 1: Run trybots but do not submit to CQ.
+ - 0: Skip CQ, upload only.
+
+ add_cc: A list of email addresses to add as CC recipients.
+ """
+ cc_recipients = []
+ if add_cc:
+ cc_recipients.extend(add_cc)
+ cmd = ['git', 'cl', 'upload', '--force', '--bypass-hooks']
+ if commit_queue_mode >= 2:
+ logging.info('Sending the CL to the CQ...')
+ cmd.extend(['-o', 'label=Bot-Commit+1'])
+ cmd.extend(['-o', 'label=Commit-Queue+2'])
+ cmd.extend(['--send-mail', '--cc', ','.join(cc_recipients)])
+ elif commit_queue_mode >= 1:
+ logging.info('Starting CQ dry run...')
+ cmd.extend(['-o', 'label=Commit-Queue+1'])
+ extra_env = {
+ 'EDITOR': 'true',
+ 'SKIP_GCE_AUTH_FOR_GIT': '1',
+ }
+ stdout, stderr = _RunCommand(cmd, extra_env=extra_env)
+ logging.debug('Output from "git cl upload":\nstdout:\n%s\n\nstderr:\n%s',
+ stdout, stderr)
+
+
+def GetRollRevisionRanges(opts, libyuv_deps):
+ current_cr_rev = libyuv_deps['vars']['chromium_revision']
+ new_cr_rev = opts.revision
+ if not new_cr_rev:
+ stdout, _ = _RunCommand(['git', 'ls-remote', CHROMIUM_SRC_URL, 'HEAD'])
+ head_rev = stdout.strip().split('\t')[0]
+ logging.info('No revision specified. Using HEAD: %s', head_rev)
+ new_cr_rev = head_rev
+
+ return ChromiumRevisionUpdate(current_cr_rev, new_cr_rev)
+
+
+def main():
+ p = argparse.ArgumentParser()
+ p.add_argument('--clean',
+ action='store_true',
+ default=False,
+ help='Removes any previous local roll branch.')
+ p.add_argument('-r',
+ '--revision',
+ help=('Chromium Git revision to roll to. Defaults to the '
+ 'Chromium HEAD revision if omitted.'))
+ p.add_argument('--dry-run',
+ action='store_true',
+ default=False,
+ help=('Calculate changes and modify DEPS, but don\'t create '
+ 'any local branch, commit, upload CL or send any '
+ 'tryjobs.'))
+ p.add_argument('-i',
+ '--ignore-unclean-workdir',
+ action='store_true',
+ default=False,
+ help=('Ignore if the current branch is not main or if there '
+ 'are uncommitted changes (default: %(default)s).'))
+ grp = p.add_mutually_exclusive_group()
+ grp.add_argument('--skip-cq',
+ action='store_true',
+ default=False,
+ help='Skip sending the CL to the CQ (default: %(default)s)')
+ grp.add_argument('--cq-over',
+ type=int,
+ default=1,
+ help=('Commit queue dry run if the revision difference '
+ 'is below this number (default: %(default)s)'))
+ p.add_argument('-v',
+ '--verbose',
+ action='store_true',
+ default=False,
+ help='Be extra verbose in printing of log messages.')
+ opts = p.parse_args()
+
+ if opts.verbose:
+ logging.basicConfig(level=logging.DEBUG)
+ else:
+ logging.basicConfig(level=logging.INFO)
+
+ if not opts.ignore_unclean_workdir and not _IsTreeClean():
+ logging.error('Please clean your local checkout first.')
+ return 1
+
+ if opts.clean:
+ _RemovePreviousRollBranch(opts.dry_run)
+
+ if not opts.ignore_unclean_workdir:
+ _EnsureUpdatedMainBranch(opts.dry_run)
+
+ deps_filename = os.path.join(CHECKOUT_SRC_DIR, 'DEPS')
+ libyuv_deps = ParseLocalDepsFile(deps_filename)
+
+ rev_update = GetRollRevisionRanges(opts, libyuv_deps)
+
+ current_commit_pos = ParseCommitPosition(
+ ReadRemoteCrCommit(rev_update.current_chromium_rev))
+ new_commit_pos = ParseCommitPosition(
+ ReadRemoteCrCommit(rev_update.new_chromium_rev))
+
+ new_cr_content = ReadRemoteCrFile('DEPS', rev_update.new_chromium_rev)
+ new_cr_deps = ParseDepsDict(new_cr_content)
+ changed_deps = CalculateChangedDeps(libyuv_deps, new_cr_deps)
+ # Discard other deps, assumed to be chromium-only dependencies.
+ new_generated_android_deps, _ = FindAddedDeps(libyuv_deps, new_cr_deps)
+ removed_generated_android_deps, other_deps = FindRemovedDeps(
+ libyuv_deps, new_cr_deps)
+ if other_deps:
+ raise RollError('LibYUV DEPS entries are missing from Chromium: %s.\n'
+ 'Remove them or add them to either '
+ 'LIBYUV_ONLY_DEPS or DONT_AUTOROLL_THESE.' % other_deps)
+ clang_change = CalculateChangedClang(rev_update.new_chromium_rev)
+ commit_msg = GenerateCommitMessage(
+ rev_update,
+ current_commit_pos,
+ new_commit_pos,
+ changed_deps,
+ added_deps_paths=new_generated_android_deps,
+ removed_deps_paths=removed_generated_android_deps,
+ clang_change=clang_change)
+ logging.debug('Commit message:\n%s', commit_msg)
+
+ _CreateRollBranch(opts.dry_run)
+ if not opts.dry_run:
+ UpdateDepsFile(deps_filename, rev_update, changed_deps, new_cr_content)
+ if _IsTreeClean():
+ logging.info("No DEPS changes detected, skipping CL creation.")
+ else:
+ _LocalCommit(commit_msg, opts.dry_run)
+ commit_queue_mode = ChooseCQMode(opts.skip_cq, opts.cq_over,
+ current_commit_pos, new_commit_pos)
+ logging.info('Uploading CL...')
+ if not opts.dry_run:
+ _UploadCL(commit_queue_mode, _GetCcRecipients(changed_deps))
+ return 0
+
+
+if __name__ == '__main__':
+ sys.exit(main())
diff --git a/files/tools_libyuv/autoroller/unittests/roll_deps_test.py b/tools_libyuv/autoroller/unittests/roll_deps_test.py
index af86bdd5..af86bdd5 100755
--- a/files/tools_libyuv/autoroller/unittests/roll_deps_test.py
+++ b/tools_libyuv/autoroller/unittests/roll_deps_test.py
diff --git a/files/tools_libyuv/autoroller/unittests/testdata/DEPS b/tools_libyuv/autoroller/unittests/testdata/DEPS
index 4f45860c..4f45860c 100644
--- a/files/tools_libyuv/autoroller/unittests/testdata/DEPS
+++ b/tools_libyuv/autoroller/unittests/testdata/DEPS
diff --git a/files/tools_libyuv/autoroller/unittests/testdata/DEPS.chromium.new b/tools_libyuv/autoroller/unittests/testdata/DEPS.chromium.new
index d53083ce..d53083ce 100644
--- a/files/tools_libyuv/autoroller/unittests/testdata/DEPS.chromium.new
+++ b/tools_libyuv/autoroller/unittests/testdata/DEPS.chromium.new
diff --git a/files/tools_libyuv/autoroller/unittests/testdata/DEPS.chromium.old b/tools_libyuv/autoroller/unittests/testdata/DEPS.chromium.old
index dd6ddaec..dd6ddaec 100644
--- a/files/tools_libyuv/autoroller/unittests/testdata/DEPS.chromium.old
+++ b/tools_libyuv/autoroller/unittests/testdata/DEPS.chromium.old
diff --git a/files/tools_libyuv/get_landmines.py b/tools_libyuv/get_landmines.py
index 8b33483e..8b33483e 100755
--- a/files/tools_libyuv/get_landmines.py
+++ b/tools_libyuv/get_landmines.py
diff --git a/tools_libyuv/msan/OWNERS b/tools_libyuv/msan/OWNERS
new file mode 100644
index 00000000..9b67a8f6
--- /dev/null
+++ b/tools_libyuv/msan/OWNERS
@@ -0,0 +1,3 @@
+mbonadei@chromium.org
+fbarchard@chromium.org
+pbos@chromium.org
diff --git a/files/tools_libyuv/msan/blacklist.txt b/tools_libyuv/msan/blacklist.txt
index 8b5e42a7..8b5e42a7 100644
--- a/files/tools_libyuv/msan/blacklist.txt
+++ b/tools_libyuv/msan/blacklist.txt
diff --git a/tools_libyuv/ubsan/OWNERS b/tools_libyuv/ubsan/OWNERS
new file mode 100644
index 00000000..9b67a8f6
--- /dev/null
+++ b/tools_libyuv/ubsan/OWNERS
@@ -0,0 +1,3 @@
+mbonadei@chromium.org
+fbarchard@chromium.org
+pbos@chromium.org
diff --git a/files/tools_libyuv/ubsan/blacklist.txt b/tools_libyuv/ubsan/blacklist.txt
index 8bcb2907..8bcb2907 100644
--- a/files/tools_libyuv/ubsan/blacklist.txt
+++ b/tools_libyuv/ubsan/blacklist.txt
diff --git a/files/tools_libyuv/ubsan/vptr_blacklist.txt b/tools_libyuv/ubsan/vptr_blacklist.txt
index 23cfca53..23cfca53 100644
--- a/files/tools_libyuv/ubsan/vptr_blacklist.txt
+++ b/tools_libyuv/ubsan/vptr_blacklist.txt
diff --git a/files/unit_test/basictypes_test.cc b/unit_test/basictypes_test.cc
index 9aaa2dcd..9aaa2dcd 100644
--- a/files/unit_test/basictypes_test.cc
+++ b/unit_test/basictypes_test.cc
diff --git a/files/unit_test/color_test.cc b/unit_test/color_test.cc
index 01267ff1..01267ff1 100644
--- a/files/unit_test/color_test.cc
+++ b/unit_test/color_test.cc
diff --git a/files/unit_test/compare_test.cc b/unit_test/compare_test.cc
index c29562cb..c29562cb 100644
--- a/files/unit_test/compare_test.cc
+++ b/unit_test/compare_test.cc
diff --git a/files/unit_test/convert_test.cc b/unit_test/convert_argb_test.cc
index 1f975825..aeee8a7f 100644
--- a/files/unit_test/convert_test.cc
+++ b/unit_test/convert_argb_test.cc
@@ -1,5 +1,5 @@
/*
- * Copyright 2011 The LibYuv Project Authors. All rights reserved.
+ * Copyright 2023 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
@@ -31,6 +31,13 @@
#include "libyuv/row.h" /* For ARGBToAR30Row_AVX2 */
#endif
+#if defined(__riscv) && !defined(__clang__)
+#define DISABLE_SLOW_TESTS
+#undef ENABLE_FULL_TESTS
+#undef ENABLE_ROW_TESTS
+#define LEAN_TESTS
+#endif
+
// Some functions fail on big endian. Enable these tests on all cpus except
// PowerPC, but they are not optimized so disabled by default.
#if !defined(DISABLE_SLOW_TESTS) && !defined(__powerpc__)
@@ -48,500 +55,15 @@ namespace libyuv {
#define AR30ToAR30 ARGBCopy
#define ABGRToABGR ARGBCopy
+// subsample amount uses a divide.
#define SUBSAMPLE(v, a) ((((v) + (a)-1)) / (a))
-// Planar test
-
-#define TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
- SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, \
- DST_SUBSAMP_X, DST_SUBSAMP_Y, W1280, N, NEG, OFF, \
- SRC_DEPTH) \
- TEST_F(LibYUVConvertTest, SRC_FMT_PLANAR##To##FMT_PLANAR##N) { \
- static_assert(SRC_BPC == 1 || SRC_BPC == 2, "SRC BPC unsupported"); \
- static_assert(DST_BPC == 1 || DST_BPC == 2, "DST BPC unsupported"); \
- static_assert(SRC_SUBSAMP_X == 1 || SRC_SUBSAMP_X == 2, \
- "SRC_SUBSAMP_X unsupported"); \
- static_assert(SRC_SUBSAMP_Y == 1 || SRC_SUBSAMP_Y == 2, \
- "SRC_SUBSAMP_Y unsupported"); \
- static_assert(DST_SUBSAMP_X == 1 || DST_SUBSAMP_X == 2, \
- "DST_SUBSAMP_X unsupported"); \
- static_assert(DST_SUBSAMP_Y == 1 || DST_SUBSAMP_Y == 2, \
- "DST_SUBSAMP_Y unsupported"); \
- const int kWidth = W1280; \
- const int kHeight = benchmark_height_; \
- const int kSrcHalfWidth = SUBSAMPLE(kWidth, SRC_SUBSAMP_X); \
- const int kSrcHalfHeight = SUBSAMPLE(kHeight, SRC_SUBSAMP_Y); \
- const int kDstHalfWidth = SUBSAMPLE(kWidth, DST_SUBSAMP_X); \
- const int kDstHalfHeight = SUBSAMPLE(kHeight, DST_SUBSAMP_Y); \
- align_buffer_page_end(src_y, kWidth* kHeight* SRC_BPC + OFF); \
- align_buffer_page_end(src_u, \
- kSrcHalfWidth* kSrcHalfHeight* SRC_BPC + OFF); \
- align_buffer_page_end(src_v, \
- kSrcHalfWidth* kSrcHalfHeight* SRC_BPC + OFF); \
- align_buffer_page_end(dst_y_c, kWidth* kHeight* DST_BPC); \
- align_buffer_page_end(dst_u_c, kDstHalfWidth* kDstHalfHeight* DST_BPC); \
- align_buffer_page_end(dst_v_c, kDstHalfWidth* kDstHalfHeight* DST_BPC); \
- align_buffer_page_end(dst_y_opt, kWidth* kHeight* DST_BPC); \
- align_buffer_page_end(dst_u_opt, kDstHalfWidth* kDstHalfHeight* DST_BPC); \
- align_buffer_page_end(dst_v_opt, kDstHalfWidth* kDstHalfHeight* DST_BPC); \
- MemRandomize(src_y + OFF, kWidth * kHeight * SRC_BPC); \
- MemRandomize(src_u + OFF, kSrcHalfWidth * kSrcHalfHeight * SRC_BPC); \
- MemRandomize(src_v + OFF, kSrcHalfWidth * kSrcHalfHeight * SRC_BPC); \
- SRC_T* src_y_p = reinterpret_cast<SRC_T*>(src_y + OFF); \
- SRC_T* src_u_p = reinterpret_cast<SRC_T*>(src_u + OFF); \
- SRC_T* src_v_p = reinterpret_cast<SRC_T*>(src_v + OFF); \
- for (int i = 0; i < kWidth * kHeight; ++i) { \
- src_y_p[i] = src_y_p[i] & ((1 << SRC_DEPTH) - 1); \
- } \
- for (int i = 0; i < kSrcHalfWidth * kSrcHalfHeight; ++i) { \
- src_u_p[i] = src_u_p[i] & ((1 << SRC_DEPTH) - 1); \
- src_v_p[i] = src_v_p[i] & ((1 << SRC_DEPTH) - 1); \
- } \
- memset(dst_y_c, 1, kWidth* kHeight* DST_BPC); \
- memset(dst_u_c, 2, kDstHalfWidth* kDstHalfHeight* DST_BPC); \
- memset(dst_v_c, 3, kDstHalfWidth* kDstHalfHeight* DST_BPC); \
- memset(dst_y_opt, 101, kWidth* kHeight* DST_BPC); \
- memset(dst_u_opt, 102, kDstHalfWidth* kDstHalfHeight* DST_BPC); \
- memset(dst_v_opt, 103, kDstHalfWidth* kDstHalfHeight* DST_BPC); \
- MaskCpuFlags(disable_cpu_flags_); \
- SRC_FMT_PLANAR##To##FMT_PLANAR( \
- src_y_p, kWidth, src_u_p, kSrcHalfWidth, src_v_p, kSrcHalfWidth, \
- reinterpret_cast<DST_T*>(dst_y_c), kWidth, \
- reinterpret_cast<DST_T*>(dst_u_c), kDstHalfWidth, \
- reinterpret_cast<DST_T*>(dst_v_c), kDstHalfWidth, kWidth, \
- NEG kHeight); \
- MaskCpuFlags(benchmark_cpu_info_); \
- for (int i = 0; i < benchmark_iterations_; ++i) { \
- SRC_FMT_PLANAR##To##FMT_PLANAR( \
- src_y_p, kWidth, src_u_p, kSrcHalfWidth, src_v_p, kSrcHalfWidth, \
- reinterpret_cast<DST_T*>(dst_y_opt), kWidth, \
- reinterpret_cast<DST_T*>(dst_u_opt), kDstHalfWidth, \
- reinterpret_cast<DST_T*>(dst_v_opt), kDstHalfWidth, kWidth, \
- NEG kHeight); \
- } \
- for (int i = 0; i < kHeight * kWidth * DST_BPC; ++i) { \
- EXPECT_EQ(dst_y_c[i], dst_y_opt[i]); \
- } \
- for (int i = 0; i < kDstHalfWidth * kDstHalfHeight * DST_BPC; ++i) { \
- EXPECT_EQ(dst_u_c[i], dst_u_opt[i]); \
- EXPECT_EQ(dst_v_c[i], dst_v_opt[i]); \
- } \
- free_aligned_buffer_page_end(dst_y_c); \
- free_aligned_buffer_page_end(dst_u_c); \
- free_aligned_buffer_page_end(dst_v_c); \
- free_aligned_buffer_page_end(dst_y_opt); \
- free_aligned_buffer_page_end(dst_u_opt); \
- free_aligned_buffer_page_end(dst_v_opt); \
- free_aligned_buffer_page_end(src_y); \
- free_aligned_buffer_page_end(src_u); \
- free_aligned_buffer_page_end(src_v); \
- }
-
-#define TESTPLANARTOP(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
- SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, \
- DST_SUBSAMP_X, DST_SUBSAMP_Y, SRC_DEPTH) \
- TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
- FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \
- benchmark_width_ + 1, _Any, +, 0, SRC_DEPTH) \
- TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
- FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \
- benchmark_width_, _Unaligned, +, 2, SRC_DEPTH) \
- TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
- FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \
- benchmark_width_, _Invert, -, 0, SRC_DEPTH) \
- TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
- FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \
- benchmark_width_, _Opt, +, 0, SRC_DEPTH)
-
-TESTPLANARTOP(I420, uint8_t, 1, 2, 2, I420, uint8_t, 1, 2, 2, 8)
-TESTPLANARTOP(I422, uint8_t, 1, 2, 1, I420, uint8_t, 1, 2, 2, 8)
-TESTPLANARTOP(I444, uint8_t, 1, 1, 1, I420, uint8_t, 1, 2, 2, 8)
-TESTPLANARTOP(I420, uint8_t, 1, 2, 2, I422, uint8_t, 1, 2, 1, 8)
-TESTPLANARTOP(I420, uint8_t, 1, 2, 2, I444, uint8_t, 1, 1, 1, 8)
-TESTPLANARTOP(I420, uint8_t, 1, 2, 2, I420Mirror, uint8_t, 1, 2, 2, 8)
-TESTPLANARTOP(I422, uint8_t, 1, 2, 1, I422, uint8_t, 1, 2, 1, 8)
-TESTPLANARTOP(I422, uint8_t, 1, 2, 1, I444, uint8_t, 1, 1, 1, 8)
-TESTPLANARTOP(I444, uint8_t, 1, 1, 1, I444, uint8_t, 1, 1, 1, 8)
-TESTPLANARTOP(I010, uint16_t, 2, 2, 2, I010, uint16_t, 2, 2, 2, 10)
-TESTPLANARTOP(I420, uint8_t, 1, 2, 2, I010, uint16_t, 2, 2, 2, 8)
-TESTPLANARTOP(I420, uint8_t, 1, 2, 2, I012, uint16_t, 2, 2, 2, 8)
-TESTPLANARTOP(H010, uint16_t, 2, 2, 2, H010, uint16_t, 2, 2, 2, 10)
-TESTPLANARTOP(H010, uint16_t, 2, 2, 2, H420, uint8_t, 1, 2, 2, 10)
-TESTPLANARTOP(H420, uint8_t, 1, 2, 2, H010, uint16_t, 2, 2, 2, 8)
-TESTPLANARTOP(H420, uint8_t, 1, 2, 2, H012, uint16_t, 2, 2, 2, 8)
-TESTPLANARTOP(I010, uint16_t, 2, 2, 2, I410, uint16_t, 2, 1, 1, 10)
-TESTPLANARTOP(I210, uint16_t, 2, 2, 1, I410, uint16_t, 2, 1, 1, 10)
-TESTPLANARTOP(I012, uint16_t, 2, 2, 2, I412, uint16_t, 2, 1, 1, 12)
-TESTPLANARTOP(I212, uint16_t, 2, 2, 1, I412, uint16_t, 2, 1, 1, 12)
-TESTPLANARTOP(I410, uint16_t, 2, 1, 1, I010, uint16_t, 2, 2, 2, 10)
-TESTPLANARTOP(I210, uint16_t, 2, 2, 1, I010, uint16_t, 2, 2, 2, 10)
-TESTPLANARTOP(I412, uint16_t, 2, 1, 1, I012, uint16_t, 2, 2, 2, 12)
-TESTPLANARTOP(I212, uint16_t, 2, 2, 1, I012, uint16_t, 2, 2, 2, 12)
-TESTPLANARTOP(I010, uint16_t, 2, 2, 2, I420, uint8_t, 1, 2, 2, 10)
-TESTPLANARTOP(I210, uint16_t, 2, 2, 1, I420, uint8_t, 1, 2, 2, 10)
-TESTPLANARTOP(I210, uint16_t, 2, 2, 1, I422, uint8_t, 1, 2, 1, 10)
-TESTPLANARTOP(I410, uint16_t, 2, 1, 1, I444, uint8_t, 1, 1, 1, 10)
-TESTPLANARTOP(I012, uint16_t, 2, 2, 2, I420, uint8_t, 1, 2, 2, 12)
-TESTPLANARTOP(I212, uint16_t, 2, 2, 1, I422, uint8_t, 1, 2, 1, 12)
-TESTPLANARTOP(I412, uint16_t, 2, 1, 1, I444, uint8_t, 1, 1, 1, 12)
-
-// Test Android 420 to I420
-#define TESTAPLANARTOPI(SRC_FMT_PLANAR, PIXEL_STRIDE, SRC_SUBSAMP_X, \
- SRC_SUBSAMP_Y, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
- W1280, N, NEG, OFF, PN, OFF_U, OFF_V) \
- TEST_F(LibYUVConvertTest, SRC_FMT_PLANAR##To##FMT_PLANAR##To##PN##N) { \
- const int kWidth = W1280; \
- const int kHeight = benchmark_height_; \
- const int kSizeUV = \
- SUBSAMPLE(kWidth, SRC_SUBSAMP_X) * SUBSAMPLE(kHeight, SRC_SUBSAMP_Y); \
- align_buffer_page_end(src_y, kWidth* kHeight + OFF); \
- align_buffer_page_end(src_uv, \
- kSizeUV*((PIXEL_STRIDE == 3) ? 3 : 2) + OFF); \
- align_buffer_page_end(dst_y_c, kWidth* kHeight); \
- align_buffer_page_end(dst_u_c, SUBSAMPLE(kWidth, SUBSAMP_X) * \
- SUBSAMPLE(kHeight, SUBSAMP_Y)); \
- align_buffer_page_end(dst_v_c, SUBSAMPLE(kWidth, SUBSAMP_X) * \
- SUBSAMPLE(kHeight, SUBSAMP_Y)); \
- align_buffer_page_end(dst_y_opt, kWidth* kHeight); \
- align_buffer_page_end(dst_u_opt, SUBSAMPLE(kWidth, SUBSAMP_X) * \
- SUBSAMPLE(kHeight, SUBSAMP_Y)); \
- align_buffer_page_end(dst_v_opt, SUBSAMPLE(kWidth, SUBSAMP_X) * \
- SUBSAMPLE(kHeight, SUBSAMP_Y)); \
- uint8_t* src_u = src_uv + OFF_U; \
- uint8_t* src_v = src_uv + (PIXEL_STRIDE == 1 ? kSizeUV : OFF_V); \
- int src_stride_uv = SUBSAMPLE(kWidth, SUBSAMP_X) * PIXEL_STRIDE; \
- for (int i = 0; i < kHeight; ++i) \
- for (int j = 0; j < kWidth; ++j) \
- src_y[i * kWidth + j + OFF] = (fastrand() & 0xff); \
- for (int i = 0; i < SUBSAMPLE(kHeight, SRC_SUBSAMP_Y); ++i) { \
- for (int j = 0; j < SUBSAMPLE(kWidth, SRC_SUBSAMP_X); ++j) { \
- src_u[(i * src_stride_uv) + j * PIXEL_STRIDE + OFF] = \
- (fastrand() & 0xff); \
- src_v[(i * src_stride_uv) + j * PIXEL_STRIDE + OFF] = \
- (fastrand() & 0xff); \
- } \
- } \
- memset(dst_y_c, 1, kWidth* kHeight); \
- memset(dst_u_c, 2, \
- SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
- memset(dst_v_c, 3, \
- SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
- memset(dst_y_opt, 101, kWidth* kHeight); \
- memset(dst_u_opt, 102, \
- SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
- memset(dst_v_opt, 103, \
- SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
- MaskCpuFlags(disable_cpu_flags_); \
- SRC_FMT_PLANAR##To##FMT_PLANAR( \
- src_y + OFF, kWidth, src_u + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X), \
- src_v + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X), PIXEL_STRIDE, dst_y_c, \
- kWidth, dst_u_c, SUBSAMPLE(kWidth, SUBSAMP_X), dst_v_c, \
- SUBSAMPLE(kWidth, SUBSAMP_X), kWidth, NEG kHeight); \
- MaskCpuFlags(benchmark_cpu_info_); \
- for (int i = 0; i < benchmark_iterations_; ++i) { \
- SRC_FMT_PLANAR##To##FMT_PLANAR( \
- src_y + OFF, kWidth, src_u + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X), \
- src_v + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X), PIXEL_STRIDE, \
- dst_y_opt, kWidth, dst_u_opt, SUBSAMPLE(kWidth, SUBSAMP_X), \
- dst_v_opt, SUBSAMPLE(kWidth, SUBSAMP_X), kWidth, NEG kHeight); \
- } \
- for (int i = 0; i < kHeight; ++i) { \
- for (int j = 0; j < kWidth; ++j) { \
- EXPECT_EQ(dst_y_c[i * kWidth + j], dst_y_opt[i * kWidth + j]); \
- } \
- } \
- for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \
- for (int j = 0; j < SUBSAMPLE(kWidth, SUBSAMP_X); ++j) { \
- EXPECT_EQ(dst_u_c[i * SUBSAMPLE(kWidth, SUBSAMP_X) + j], \
- dst_u_opt[i * SUBSAMPLE(kWidth, SUBSAMP_X) + j]); \
- } \
- } \
- for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \
- for (int j = 0; j < SUBSAMPLE(kWidth, SUBSAMP_X); ++j) { \
- EXPECT_EQ(dst_v_c[i * SUBSAMPLE(kWidth, SUBSAMP_X) + j], \
- dst_v_opt[i * SUBSAMPLE(kWidth, SUBSAMP_X) + j]); \
- } \
- } \
- free_aligned_buffer_page_end(dst_y_c); \
- free_aligned_buffer_page_end(dst_u_c); \
- free_aligned_buffer_page_end(dst_v_c); \
- free_aligned_buffer_page_end(dst_y_opt); \
- free_aligned_buffer_page_end(dst_u_opt); \
- free_aligned_buffer_page_end(dst_v_opt); \
- free_aligned_buffer_page_end(src_y); \
- free_aligned_buffer_page_end(src_uv); \
- }
-
-#define TESTAPLANARTOP(SRC_FMT_PLANAR, PN, PIXEL_STRIDE, OFF_U, OFF_V, \
- SRC_SUBSAMP_X, SRC_SUBSAMP_Y, FMT_PLANAR, SUBSAMP_X, \
- SUBSAMP_Y) \
- TESTAPLANARTOPI(SRC_FMT_PLANAR, PIXEL_STRIDE, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
- FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, benchmark_width_ + 1, \
- _Any, +, 0, PN, OFF_U, OFF_V) \
- TESTAPLANARTOPI(SRC_FMT_PLANAR, PIXEL_STRIDE, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
- FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, benchmark_width_, \
- _Unaligned, +, 2, PN, OFF_U, OFF_V) \
- TESTAPLANARTOPI(SRC_FMT_PLANAR, PIXEL_STRIDE, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
- FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, benchmark_width_, _Invert, \
- -, 0, PN, OFF_U, OFF_V) \
- TESTAPLANARTOPI(SRC_FMT_PLANAR, PIXEL_STRIDE, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
- FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, benchmark_width_, _Opt, +, \
- 0, PN, OFF_U, OFF_V)
-
-TESTAPLANARTOP(Android420, I420, 1, 0, 0, 2, 2, I420, 2, 2)
-TESTAPLANARTOP(Android420, NV12, 2, 0, 1, 2, 2, I420, 2, 2)
-TESTAPLANARTOP(Android420, NV21, 2, 1, 0, 2, 2, I420, 2, 2)
-#undef TESTAPLANARTOP
-#undef TESTAPLANARTOPI
-
-// wrapper to keep API the same
-int I400ToNV21(const uint8_t* src_y,
- int src_stride_y,
- const uint8_t* /* src_u */,
- int /* src_stride_u */,
- const uint8_t* /* src_v */,
- int /* src_stride_v */,
- uint8_t* dst_y,
- int dst_stride_y,
- uint8_t* dst_vu,
- int dst_stride_vu,
- int width,
- int height) {
- return I400ToNV21(src_y, src_stride_y, dst_y, dst_stride_y, dst_vu,
- dst_stride_vu, width, height);
-}
-
-#define TESTPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
- SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, \
- DST_SUBSAMP_X, DST_SUBSAMP_Y, W1280, N, NEG, OFF, \
- SRC_DEPTH) \
- TEST_F(LibYUVConvertTest, SRC_FMT_PLANAR##To##FMT_PLANAR##N) { \
- static_assert(SRC_BPC == 1 || SRC_BPC == 2, "SRC BPC unsupported"); \
- static_assert(DST_BPC == 1 || DST_BPC == 2, "DST BPC unsupported"); \
- static_assert(SRC_SUBSAMP_X == 1 || SRC_SUBSAMP_X == 2, \
- "SRC_SUBSAMP_X unsupported"); \
- static_assert(SRC_SUBSAMP_Y == 1 || SRC_SUBSAMP_Y == 2, \
- "SRC_SUBSAMP_Y unsupported"); \
- static_assert(DST_SUBSAMP_X == 1 || DST_SUBSAMP_X == 2, \
- "DST_SUBSAMP_X unsupported"); \
- static_assert(DST_SUBSAMP_Y == 1 || DST_SUBSAMP_Y == 2, \
- "DST_SUBSAMP_Y unsupported"); \
- const int kWidth = W1280; \
- const int kHeight = benchmark_height_; \
- const int kSrcHalfWidth = SUBSAMPLE(kWidth, SRC_SUBSAMP_X); \
- const int kSrcHalfHeight = SUBSAMPLE(kHeight, SRC_SUBSAMP_Y); \
- const int kDstHalfWidth = SUBSAMPLE(kWidth, DST_SUBSAMP_X); \
- const int kDstHalfHeight = SUBSAMPLE(kHeight, DST_SUBSAMP_Y); \
- align_buffer_page_end(src_y, kWidth* kHeight* SRC_BPC + OFF); \
- align_buffer_page_end(src_u, \
- kSrcHalfWidth* kSrcHalfHeight* SRC_BPC + OFF); \
- align_buffer_page_end(src_v, \
- kSrcHalfWidth* kSrcHalfHeight* SRC_BPC + OFF); \
- align_buffer_page_end(dst_y_c, kWidth* kHeight* DST_BPC); \
- align_buffer_page_end(dst_uv_c, \
- kDstHalfWidth* kDstHalfHeight* DST_BPC * 2); \
- align_buffer_page_end(dst_y_opt, kWidth* kHeight* DST_BPC); \
- align_buffer_page_end(dst_uv_opt, \
- kDstHalfWidth* kDstHalfHeight* DST_BPC * 2); \
- MemRandomize(src_y + OFF, kWidth * kHeight * SRC_BPC); \
- MemRandomize(src_u + OFF, kSrcHalfWidth * kSrcHalfHeight * SRC_BPC); \
- MemRandomize(src_v + OFF, kSrcHalfWidth * kSrcHalfHeight * SRC_BPC); \
- SRC_T* src_y_p = reinterpret_cast<SRC_T*>(src_y + OFF); \
- SRC_T* src_u_p = reinterpret_cast<SRC_T*>(src_u + OFF); \
- SRC_T* src_v_p = reinterpret_cast<SRC_T*>(src_v + OFF); \
- for (int i = 0; i < kWidth * kHeight; ++i) { \
- src_y_p[i] = src_y_p[i] & ((1 << SRC_DEPTH) - 1); \
- } \
- for (int i = 0; i < kSrcHalfWidth * kSrcHalfHeight; ++i) { \
- src_u_p[i] = src_u_p[i] & ((1 << SRC_DEPTH) - 1); \
- src_v_p[i] = src_v_p[i] & ((1 << SRC_DEPTH) - 1); \
- } \
- memset(dst_y_c, 1, kWidth* kHeight* DST_BPC); \
- memset(dst_uv_c, 2, kDstHalfWidth* kDstHalfHeight* DST_BPC * 2); \
- memset(dst_y_opt, 101, kWidth* kHeight* DST_BPC); \
- memset(dst_uv_opt, 102, kDstHalfWidth* kDstHalfHeight* DST_BPC * 2); \
- MaskCpuFlags(disable_cpu_flags_); \
- SRC_FMT_PLANAR##To##FMT_PLANAR(src_y_p, kWidth, src_u_p, kSrcHalfWidth, \
- src_v_p, kSrcHalfWidth, \
- reinterpret_cast<DST_T*>(dst_y_c), kWidth, \
- reinterpret_cast<DST_T*>(dst_uv_c), \
- kDstHalfWidth * 2, kWidth, NEG kHeight); \
- MaskCpuFlags(benchmark_cpu_info_); \
- for (int i = 0; i < benchmark_iterations_; ++i) { \
- SRC_FMT_PLANAR##To##FMT_PLANAR( \
- src_y_p, kWidth, src_u_p, kSrcHalfWidth, src_v_p, kSrcHalfWidth, \
- reinterpret_cast<DST_T*>(dst_y_opt), kWidth, \
- reinterpret_cast<DST_T*>(dst_uv_opt), kDstHalfWidth * 2, kWidth, \
- NEG kHeight); \
- } \
- for (int i = 0; i < kHeight * kWidth * DST_BPC; ++i) { \
- EXPECT_EQ(dst_y_c[i], dst_y_opt[i]); \
- } \
- for (int i = 0; i < kDstHalfWidth * kDstHalfHeight * DST_BPC * 2; ++i) { \
- EXPECT_EQ(dst_uv_c[i], dst_uv_opt[i]); \
- } \
- free_aligned_buffer_page_end(dst_y_c); \
- free_aligned_buffer_page_end(dst_uv_c); \
- free_aligned_buffer_page_end(dst_y_opt); \
- free_aligned_buffer_page_end(dst_uv_opt); \
- free_aligned_buffer_page_end(src_y); \
- free_aligned_buffer_page_end(src_u); \
- free_aligned_buffer_page_end(src_v); \
- }
-
-#define TESTPLANARTOBP(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
- SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, \
- DST_SUBSAMP_X, DST_SUBSAMP_Y, SRC_DEPTH) \
- TESTPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
- SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \
- DST_SUBSAMP_Y, benchmark_width_ + 1, _Any, +, 0, SRC_DEPTH) \
- TESTPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
- SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \
- DST_SUBSAMP_Y, benchmark_width_, _Unaligned, +, 2, \
- SRC_DEPTH) \
- TESTPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
- SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \
- DST_SUBSAMP_Y, benchmark_width_, _Invert, -, 0, SRC_DEPTH) \
- TESTPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
- SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \
- DST_SUBSAMP_Y, benchmark_width_, _Opt, +, 0, SRC_DEPTH)
-
-TESTPLANARTOBP(I420, uint8_t, 1, 2, 2, NV12, uint8_t, 1, 2, 2, 8)
-TESTPLANARTOBP(I420, uint8_t, 1, 2, 2, NV21, uint8_t, 1, 2, 2, 8)
-TESTPLANARTOBP(I422, uint8_t, 1, 2, 1, NV21, uint8_t, 1, 2, 2, 8)
-TESTPLANARTOBP(I444, uint8_t, 1, 1, 1, NV12, uint8_t, 1, 2, 2, 8)
-TESTPLANARTOBP(I444, uint8_t, 1, 1, 1, NV21, uint8_t, 1, 2, 2, 8)
-TESTPLANARTOBP(I400, uint8_t, 1, 2, 2, NV21, uint8_t, 1, 2, 2, 8)
-TESTPLANARTOBP(I010, uint16_t, 2, 2, 2, P010, uint16_t, 2, 2, 2, 10)
-TESTPLANARTOBP(I210, uint16_t, 2, 2, 1, P210, uint16_t, 2, 2, 1, 10)
-TESTPLANARTOBP(I012, uint16_t, 2, 2, 2, P012, uint16_t, 2, 2, 2, 12)
-TESTPLANARTOBP(I212, uint16_t, 2, 2, 1, P212, uint16_t, 2, 2, 1, 12)
-
-#define TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
- SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, \
- DST_SUBSAMP_X, DST_SUBSAMP_Y, W1280, N, NEG, OFF, \
- DOY, SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT) \
- TEST_F(LibYUVConvertTest, SRC_FMT_PLANAR##To##FMT_PLANAR##N) { \
- static_assert(SRC_BPC == 1 || SRC_BPC == 2, "SRC BPC unsupported"); \
- static_assert(DST_BPC == 1 || DST_BPC == 2, "DST BPC unsupported"); \
- static_assert(SRC_SUBSAMP_X == 1 || SRC_SUBSAMP_X == 2, \
- "SRC_SUBSAMP_X unsupported"); \
- static_assert(SRC_SUBSAMP_Y == 1 || SRC_SUBSAMP_Y == 2, \
- "SRC_SUBSAMP_Y unsupported"); \
- static_assert(DST_SUBSAMP_X == 1 || DST_SUBSAMP_X == 2, \
- "DST_SUBSAMP_X unsupported"); \
- static_assert(DST_SUBSAMP_Y == 1 || DST_SUBSAMP_Y == 2, \
- "DST_SUBSAMP_Y unsupported"); \
- const int kWidth = W1280; \
- const int kHeight = benchmark_height_; \
- const int kSrcHalfWidth = SUBSAMPLE(kWidth, SRC_SUBSAMP_X); \
- const int kDstHalfWidth = SUBSAMPLE(kWidth, DST_SUBSAMP_X); \
- const int kDstHalfHeight = SUBSAMPLE(kHeight, DST_SUBSAMP_Y); \
- const int kPaddedWidth = (kWidth + (TILE_WIDTH - 1)) & ~(TILE_WIDTH - 1); \
- const int kPaddedHeight = \
- (kHeight + (TILE_HEIGHT - 1)) & ~(TILE_HEIGHT - 1); \
- const int kSrcHalfPaddedWidth = SUBSAMPLE(kPaddedWidth, SRC_SUBSAMP_X); \
- const int kSrcHalfPaddedHeight = SUBSAMPLE(kPaddedHeight, SRC_SUBSAMP_Y); \
- align_buffer_page_end(src_y, kPaddedWidth* kPaddedHeight* SRC_BPC + OFF); \
- align_buffer_page_end( \
- src_uv, \
- 2 * kSrcHalfPaddedWidth * kSrcHalfPaddedHeight * SRC_BPC + OFF); \
- align_buffer_page_end(dst_y_c, kWidth* kHeight* DST_BPC); \
- align_buffer_page_end(dst_uv_c, \
- 2 * kDstHalfWidth * kDstHalfHeight * DST_BPC); \
- align_buffer_page_end(dst_y_opt, kWidth* kHeight* DST_BPC); \
- align_buffer_page_end(dst_uv_opt, \
- 2 * kDstHalfWidth * kDstHalfHeight * DST_BPC); \
- SRC_T* src_y_p = reinterpret_cast<SRC_T*>(src_y + OFF); \
- SRC_T* src_uv_p = reinterpret_cast<SRC_T*>(src_uv + OFF); \
- for (int i = 0; i < kPaddedWidth * kPaddedHeight; ++i) { \
- src_y_p[i] = \
- (fastrand() & (((SRC_T)(-1)) << ((8 * SRC_BPC) - SRC_DEPTH))); \
- } \
- for (int i = 0; i < kSrcHalfPaddedWidth * kSrcHalfPaddedHeight * 2; ++i) { \
- src_uv_p[i] = \
- (fastrand() & (((SRC_T)(-1)) << ((8 * SRC_BPC) - SRC_DEPTH))); \
- } \
- memset(dst_y_c, 1, kWidth* kHeight* DST_BPC); \
- memset(dst_uv_c, 2, 2 * kDstHalfWidth * kDstHalfHeight * DST_BPC); \
- memset(dst_y_opt, 101, kWidth* kHeight* DST_BPC); \
- memset(dst_uv_opt, 102, 2 * kDstHalfWidth * kDstHalfHeight * DST_BPC); \
- MaskCpuFlags(disable_cpu_flags_); \
- SRC_FMT_PLANAR##To##FMT_PLANAR( \
- src_y_p, kWidth, src_uv_p, 2 * kSrcHalfWidth, \
- DOY ? reinterpret_cast<DST_T*>(dst_y_c) : NULL, kWidth, \
- reinterpret_cast<DST_T*>(dst_uv_c), 2 * kDstHalfWidth, kWidth, \
- NEG kHeight); \
- MaskCpuFlags(benchmark_cpu_info_); \
- for (int i = 0; i < benchmark_iterations_; ++i) { \
- SRC_FMT_PLANAR##To##FMT_PLANAR( \
- src_y_p, kWidth, src_uv_p, 2 * kSrcHalfWidth, \
- DOY ? reinterpret_cast<DST_T*>(dst_y_opt) : NULL, kWidth, \
- reinterpret_cast<DST_T*>(dst_uv_opt), 2 * kDstHalfWidth, kWidth, \
- NEG kHeight); \
- } \
- if (DOY) { \
- for (int i = 0; i < kHeight; ++i) { \
- for (int j = 0; j < kWidth; ++j) { \
- EXPECT_EQ(dst_y_c[i * kWidth + j], dst_y_opt[i * kWidth + j]); \
- } \
- } \
- } \
- for (int i = 0; i < kDstHalfHeight; ++i) { \
- for (int j = 0; j < 2 * kDstHalfWidth; ++j) { \
- EXPECT_EQ(dst_uv_c[i * 2 * kDstHalfWidth + j], \
- dst_uv_opt[i * 2 * kDstHalfWidth + j]); \
- } \
- } \
- free_aligned_buffer_page_end(dst_y_c); \
- free_aligned_buffer_page_end(dst_uv_c); \
- free_aligned_buffer_page_end(dst_y_opt); \
- free_aligned_buffer_page_end(dst_uv_opt); \
- free_aligned_buffer_page_end(src_y); \
- free_aligned_buffer_page_end(src_uv); \
- }
+#define ALIGNINT(V, ALIGN) (((V) + (ALIGN)-1) / (ALIGN) * (ALIGN))
-#define TESTBIPLANARTOBP(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
- SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, \
- DST_SUBSAMP_X, DST_SUBSAMP_Y, SRC_DEPTH, TILE_WIDTH, \
- TILE_HEIGHT) \
- TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
- SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \
- DST_SUBSAMP_Y, benchmark_width_ + 1, _Any, +, 0, 1, \
- SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT) \
- TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
- SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \
- DST_SUBSAMP_Y, benchmark_width_, _Unaligned, +, 2, 1, \
- SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT) \
- TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
- SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \
- DST_SUBSAMP_Y, benchmark_width_, _Invert, -, 0, 1, \
- SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT) \
- TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
- SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \
- DST_SUBSAMP_Y, benchmark_width_, _Opt, +, 0, 1, SRC_DEPTH, \
- TILE_WIDTH, TILE_HEIGHT) \
- TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
- SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \
- DST_SUBSAMP_Y, benchmark_width_, _NullY, +, 0, 0, \
- SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT)
-
-TESTBIPLANARTOBP(NV21, uint8_t, 1, 2, 2, NV12, uint8_t, 1, 2, 2, 8, 1, 1)
-TESTBIPLANARTOBP(NV12, uint8_t, 1, 2, 2, NV12Mirror, uint8_t, 1, 2, 2, 8, 1, 1)
-TESTBIPLANARTOBP(NV12, uint8_t, 1, 2, 2, NV24, uint8_t, 1, 1, 1, 8, 1, 1)
-TESTBIPLANARTOBP(NV16, uint8_t, 1, 2, 1, NV24, uint8_t, 1, 1, 1, 8, 1, 1)
-TESTBIPLANARTOBP(P010, uint16_t, 2, 2, 2, P410, uint16_t, 2, 1, 1, 10, 1, 1)
-TESTBIPLANARTOBP(P210, uint16_t, 2, 2, 1, P410, uint16_t, 2, 1, 1, 10, 1, 1)
-TESTBIPLANARTOBP(P012, uint16_t, 2, 2, 2, P412, uint16_t, 2, 1, 1, 10, 1, 1)
-TESTBIPLANARTOBP(P212, uint16_t, 2, 2, 1, P412, uint16_t, 2, 1, 1, 12, 1, 1)
-TESTBIPLANARTOBP(P016, uint16_t, 2, 2, 2, P416, uint16_t, 2, 1, 1, 12, 1, 1)
-TESTBIPLANARTOBP(P216, uint16_t, 2, 2, 1, P416, uint16_t, 2, 1, 1, 12, 1, 1)
-TESTBIPLANARTOBP(MM21, uint8_t, 1, 2, 2, NV12, uint8_t, 1, 2, 2, 8, 16, 32)
-
-#define TESTBIPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
- SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, \
- DST_SUBSAMP_X, DST_SUBSAMP_Y, W1280, N, NEG, OFF, \
- SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT) \
+#define TESTBPTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
+ SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \
+ DST_SUBSAMP_Y, W1280, N, NEG, OFF, SRC_DEPTH, TILE_WIDTH, \
+ TILE_HEIGHT) \
TEST_F(LibYUVConvertTest, SRC_FMT_PLANAR##To##FMT_PLANAR##N) { \
static_assert(SRC_BPC == 1 || SRC_BPC == 2, "SRC BPC unsupported"); \
static_assert(DST_BPC == 1 || DST_BPC == 2, "DST BPC unsupported"); \
@@ -621,30 +143,39 @@ TESTBIPLANARTOBP(MM21, uint8_t, 1, 2, 2, NV12, uint8_t, 1, 2, 2, 8, 16, 32)
free_aligned_buffer_page_end(src_uv); \
}
-#define TESTBIPLANARTOP(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
- SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, \
- DST_SUBSAMP_X, DST_SUBSAMP_Y, SRC_DEPTH, TILE_WIDTH, \
- TILE_HEIGHT) \
- TESTBIPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
- SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \
- DST_SUBSAMP_Y, benchmark_width_ + 1, _Any, +, 0, SRC_DEPTH, \
- TILE_WIDTH, TILE_HEIGHT) \
- TESTBIPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
- SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \
- DST_SUBSAMP_Y, benchmark_width_, _Unaligned, +, 2, \
- SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT) \
- TESTBIPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
- SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \
- DST_SUBSAMP_Y, benchmark_width_, _Invert, -, 0, SRC_DEPTH, \
- TILE_WIDTH, TILE_HEIGHT) \
- TESTBIPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
- SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \
- DST_SUBSAMP_Y, benchmark_width_, _Opt, +, 0, SRC_DEPTH, \
- TILE_WIDTH, TILE_HEIGHT)
+#if defined(ENABLE_FULL_TESTS)
+#define TESTBPTOP(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
+ SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \
+ DST_SUBSAMP_Y, SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT) \
+ TESTBPTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
+ FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \
+ benchmark_width_ + 1, _Any, +, 0, SRC_DEPTH, TILE_WIDTH, \
+ TILE_HEIGHT) \
+ TESTBPTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
+ FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \
+ benchmark_width_, _Unaligned, +, 2, SRC_DEPTH, TILE_WIDTH, \
+ TILE_HEIGHT) \
+ TESTBPTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
+ FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \
+ benchmark_width_, _Invert, -, 0, SRC_DEPTH, TILE_WIDTH, \
+ TILE_HEIGHT) \
+ TESTBPTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
+ FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \
+ benchmark_width_, _Opt, +, 0, SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT)
+#else
+#define TESTBPTOP(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
+ SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \
+ DST_SUBSAMP_Y, SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT) \
+ TESTBPTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
+ FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \
+ benchmark_width_, _Opt, +, 0, SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT)
+#endif
-TESTBIPLANARTOP(NV12, uint8_t, 1, 2, 2, I420, uint8_t, 1, 2, 2, 8, 1, 1)
-TESTBIPLANARTOP(NV21, uint8_t, 1, 2, 2, I420, uint8_t, 1, 2, 2, 8, 1, 1)
-TESTBIPLANARTOP(MM21, uint8_t, 1, 2, 2, I420, uint8_t, 1, 2, 2, 8, 16, 32)
+TESTBPTOP(NV12, uint8_t, 1, 2, 2, I420, uint8_t, 1, 2, 2, 8, 1, 1)
+TESTBPTOP(NV21, uint8_t, 1, 2, 2, I420, uint8_t, 1, 2, 2, 8, 1, 1)
+TESTBPTOP(MM21, uint8_t, 1, 2, 2, I420, uint8_t, 1, 2, 2, 8, 16, 32)
+TESTBPTOP(P010, uint16_t, 2, 2, 2, I010, uint16_t, 2, 2, 2, 10, 1, 1)
+TESTBPTOP(P012, uint16_t, 2, 2, 2, I012, uint16_t, 2, 2, 2, 12, 1, 1)
// Provide matrix wrappers for full range bt.709
#define F420ToABGR(a, b, c, d, e, f, g, h, i, j) \
@@ -680,8 +211,12 @@ TESTBIPLANARTOP(MM21, uint8_t, 1, 2, 2, I420, uint8_t, 1, 2, 2, 8, 16, 32)
#define I422ToARGBFilter(a, b, c, d, e, f, g, h, i, j) \
I422ToARGBMatrixFilter(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j, \
kFilterBilinear)
-
-#define ALIGNINT(V, ALIGN) (((V) + (ALIGN)-1) / (ALIGN) * (ALIGN))
+#define I420ToRGB24Filter(a, b, c, d, e, f, g, h, i, j) \
+ I420ToRGB24MatrixFilter(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j, \
+ kFilterBilinear)
+#define I422ToRGB24Filter(a, b, c, d, e, f, g, h, i, j) \
+ I420ToRGB24MatrixFilter(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j, \
+ kFilterBilinear)
#define TESTPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
YALIGN, W1280, N, NEG, OFF) \
@@ -746,8 +281,6 @@ TESTBIPLANARTOP(MM21, uint8_t, 1, 2, 2, I420, uint8_t, 1, 2, 2, 8, 16, 32)
#define TESTPLANARTOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
YALIGN) \
TESTPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
- YALIGN, benchmark_width_ + 1, _Any, +, 0) \
- TESTPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
YALIGN, benchmark_width_, _Opt, +, 0)
#endif
@@ -792,8 +325,12 @@ TESTPLANARTOB(V422, 2, 1, ARGB, 4, 4, 1)
TESTPLANARTOB(V422, 2, 1, ABGR, 4, 4, 1)
TESTPLANARTOB(I422, 2, 1, BGRA, 4, 4, 1)
TESTPLANARTOB(I422, 2, 1, RGBA, 4, 4, 1)
+TESTPLANARTOB(I422, 1, 1, RGB24, 3, 3, 1)
+TESTPLANARTOB(I422, 1, 1, RAW, 3, 3, 1)
TESTPLANARTOB(I444, 1, 1, ARGB, 4, 4, 1)
TESTPLANARTOB(I444, 1, 1, ABGR, 4, 4, 1)
+TESTPLANARTOB(I444, 1, 1, RGB24, 3, 3, 1)
+TESTPLANARTOB(I444, 1, 1, RAW, 3, 3, 1)
TESTPLANARTOB(J444, 1, 1, ARGB, 4, 4, 1)
TESTPLANARTOB(J444, 1, 1, ABGR, 4, 4, 1)
TESTPLANARTOB(H444, 1, 1, ARGB, 4, 4, 1)
@@ -816,7 +353,9 @@ TESTPLANARTOB(H420, 2, 2, AB30, 4, 4, 1)
#endif
TESTPLANARTOB(I420, 2, 2, ARGBFilter, 4, 4, 1)
TESTPLANARTOB(I422, 2, 1, ARGBFilter, 4, 4, 1)
-#else
+TESTPLANARTOB(I420, 2, 2, RGB24Filter, 3, 3, 1)
+TESTPLANARTOB(I422, 2, 2, RGB24Filter, 3, 3, 1)
+#else // FULL_TESTS
TESTPLANARTOB(I420, 2, 2, ABGR, 4, 4, 1)
TESTPLANARTOB(I420, 2, 2, ARGB, 4, 4, 1)
TESTPLANARTOB(I420, 2, 2, BGRA, 4, 4, 1)
@@ -832,232 +371,21 @@ TESTPLANARTOB(I422, 2, 1, RGB565, 2, 2, 1)
TESTPLANARTOB(I420, 2, 2, I400, 1, 1, 1)
TESTPLANARTOB(I420, 2, 2, UYVY, 2, 4, 1)
TESTPLANARTOB(I420, 2, 2, YUY2, 2, 4, 1)
-TESTPLANARTOB(I420, 2, 2, ARGBFilter, 4, 4, 1)
TESTPLANARTOB(I422, 2, 1, ABGR, 4, 4, 1)
TESTPLANARTOB(I422, 2, 1, ARGB, 4, 4, 1)
TESTPLANARTOB(I422, 2, 1, BGRA, 4, 4, 1)
TESTPLANARTOB(I422, 2, 1, RGBA, 4, 4, 1)
TESTPLANARTOB(I422, 2, 1, UYVY, 2, 4, 1)
TESTPLANARTOB(I422, 2, 1, YUY2, 2, 4, 1)
+TESTPLANARTOB(I420, 2, 2, ARGBFilter, 4, 4, 1)
TESTPLANARTOB(I422, 2, 1, ARGBFilter, 4, 4, 1)
+TESTPLANARTOB(I420, 2, 2, RGB24Filter, 3, 3, 1)
TESTPLANARTOB(I444, 1, 1, ABGR, 4, 4, 1)
TESTPLANARTOB(I444, 1, 1, ARGB, 4, 4, 1)
#endif
-#define TESTQPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
- YALIGN, W1280, N, NEG, OFF, ATTEN) \
- TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##N) { \
- const int kWidth = W1280; \
- const int kHeight = ALIGNINT(benchmark_height_, YALIGN); \
- const int kStrideB = ALIGNINT(kWidth * BPP_B, ALIGN); \
- const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X); \
- const int kSizeUV = kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y); \
- align_buffer_page_end(src_y, kWidth* kHeight + OFF); \
- align_buffer_page_end(src_u, kSizeUV + OFF); \
- align_buffer_page_end(src_v, kSizeUV + OFF); \
- align_buffer_page_end(src_a, kWidth* kHeight + OFF); \
- align_buffer_page_end(dst_argb_c, kStrideB* kHeight + OFF); \
- align_buffer_page_end(dst_argb_opt, kStrideB* kHeight + OFF); \
- for (int i = 0; i < kWidth * kHeight; ++i) { \
- src_y[i + OFF] = (fastrand() & 0xff); \
- src_a[i + OFF] = (fastrand() & 0xff); \
- } \
- for (int i = 0; i < kSizeUV; ++i) { \
- src_u[i + OFF] = (fastrand() & 0xff); \
- src_v[i + OFF] = (fastrand() & 0xff); \
- } \
- memset(dst_argb_c + OFF, 1, kStrideB * kHeight); \
- memset(dst_argb_opt + OFF, 101, kStrideB * kHeight); \
- MaskCpuFlags(disable_cpu_flags_); \
- FMT_PLANAR##To##FMT_B(src_y + OFF, kWidth, src_u + OFF, kStrideUV, \
- src_v + OFF, kStrideUV, src_a + OFF, kWidth, \
- dst_argb_c + OFF, kStrideB, kWidth, NEG kHeight, \
- ATTEN); \
- MaskCpuFlags(benchmark_cpu_info_); \
- for (int i = 0; i < benchmark_iterations_; ++i) { \
- FMT_PLANAR##To##FMT_B(src_y + OFF, kWidth, src_u + OFF, kStrideUV, \
- src_v + OFF, kStrideUV, src_a + OFF, kWidth, \
- dst_argb_opt + OFF, kStrideB, kWidth, NEG kHeight, \
- ATTEN); \
- } \
- for (int i = 0; i < kWidth * BPP_B * kHeight; ++i) { \
- EXPECT_EQ(dst_argb_c[i + OFF], dst_argb_opt[i + OFF]); \
- } \
- free_aligned_buffer_page_end(src_y); \
- free_aligned_buffer_page_end(src_u); \
- free_aligned_buffer_page_end(src_v); \
- free_aligned_buffer_page_end(src_a); \
- free_aligned_buffer_page_end(dst_argb_c); \
- free_aligned_buffer_page_end(dst_argb_opt); \
- }
-
-#if defined(ENABLE_FULL_TESTS)
-#define TESTQPLANARTOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
- YALIGN) \
- TESTQPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
- YALIGN, benchmark_width_ + 1, _Any, +, 0, 0) \
- TESTQPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
- YALIGN, benchmark_width_, _Unaligned, +, 2, 0) \
- TESTQPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
- YALIGN, benchmark_width_, _Invert, -, 0, 0) \
- TESTQPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
- YALIGN, benchmark_width_, _Opt, +, 0, 0) \
- TESTQPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
- YALIGN, benchmark_width_, _Premult, +, 0, 1)
-#else
-#define TESTQPLANARTOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
- YALIGN) \
- TESTQPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
- YALIGN, benchmark_width_, _Opt, +, 0, 0)
-#endif
-
-#define J420AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \
- I420AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvJPEGConstants, k, \
- l, m)
-#define J420AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \
- I420AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvJPEGConstants, k, \
- l, m)
-#define F420AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \
- I420AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvF709Constants, k, \
- l, m)
-#define F420AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \
- I420AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvF709Constants, k, \
- l, m)
-#define H420AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \
- I420AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvH709Constants, k, \
- l, m)
-#define H420AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \
- I420AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvH709Constants, k, \
- l, m)
-#define U420AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \
- I420AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuv2020Constants, k, \
- l, m)
-#define U420AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \
- I420AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuv2020Constants, k, \
- l, m)
-#define V420AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \
- I420AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvV2020Constants, k, \
- l, m)
-#define V420AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \
- I420AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvV2020Constants, k, \
- l, m)
-#define J422AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \
- I422AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvJPEGConstants, k, \
- l, m)
-#define J422AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \
- I422AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvJPEGConstants, k, \
- l, m)
-#define F422AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \
- I422AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvF709Constants, k, \
- l, m)
-#define F422AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \
- I422AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvF709Constants, k, \
- l, m)
-#define H422AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \
- I422AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvH709Constants, k, \
- l, m)
-#define H422AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \
- I422AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvH709Constants, k, \
- l, m)
-#define U422AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \
- I422AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuv2020Constants, k, \
- l, m)
-#define U422AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \
- I422AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuv2020Constants, k, \
- l, m)
-#define V422AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \
- I422AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvV2020Constants, k, \
- l, m)
-#define V422AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \
- I422AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvV2020Constants, k, \
- l, m)
-#define J444AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \
- I444AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvJPEGConstants, k, \
- l, m)
-#define J444AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \
- I444AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvJPEGConstants, k, \
- l, m)
-#define F444AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \
- I444AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvF709Constants, k, \
- l, m)
-#define F444AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \
- I444AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvF709Constants, k, \
- l, m)
-#define H444AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \
- I444AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvH709Constants, k, \
- l, m)
-#define H444AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \
- I444AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvH709Constants, k, \
- l, m)
-#define U444AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \
- I444AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuv2020Constants, k, \
- l, m)
-#define U444AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \
- I444AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuv2020Constants, k, \
- l, m)
-#define V444AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \
- I444AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvV2020Constants, k, \
- l, m)
-#define V444AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \
- I444AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvV2020Constants, k, \
- l, m)
-
-#define I420AlphaToARGBFilter(a, b, c, d, e, f, g, h, i, j, k, l, m) \
- I420AlphaToARGBMatrixFilter(a, b, c, d, e, f, g, h, i, j, \
- &kYuvI601Constants, k, l, m, kFilterBilinear)
-#define I422AlphaToARGBFilter(a, b, c, d, e, f, g, h, i, j, k, l, m) \
- I422AlphaToARGBMatrixFilter(a, b, c, d, e, f, g, h, i, j, \
- &kYuvI601Constants, k, l, m, kFilterBilinear)
-
-#if defined(ENABLE_FULL_TESTS)
-TESTQPLANARTOB(I420Alpha, 2, 2, ARGB, 4, 4, 1)
-TESTQPLANARTOB(I420Alpha, 2, 2, ABGR, 4, 4, 1)
-TESTQPLANARTOB(J420Alpha, 2, 2, ARGB, 4, 4, 1)
-TESTQPLANARTOB(J420Alpha, 2, 2, ABGR, 4, 4, 1)
-TESTQPLANARTOB(H420Alpha, 2, 2, ARGB, 4, 4, 1)
-TESTQPLANARTOB(H420Alpha, 2, 2, ABGR, 4, 4, 1)
-TESTQPLANARTOB(F420Alpha, 2, 2, ARGB, 4, 4, 1)
-TESTQPLANARTOB(F420Alpha, 2, 2, ABGR, 4, 4, 1)
-TESTQPLANARTOB(U420Alpha, 2, 2, ARGB, 4, 4, 1)
-TESTQPLANARTOB(U420Alpha, 2, 2, ABGR, 4, 4, 1)
-TESTQPLANARTOB(V420Alpha, 2, 2, ARGB, 4, 4, 1)
-TESTQPLANARTOB(V420Alpha, 2, 2, ABGR, 4, 4, 1)
-TESTQPLANARTOB(I422Alpha, 2, 1, ARGB, 4, 4, 1)
-TESTQPLANARTOB(I422Alpha, 2, 1, ABGR, 4, 4, 1)
-TESTQPLANARTOB(J422Alpha, 2, 1, ARGB, 4, 4, 1)
-TESTQPLANARTOB(J422Alpha, 2, 1, ABGR, 4, 4, 1)
-TESTQPLANARTOB(H422Alpha, 2, 1, ARGB, 4, 4, 1)
-TESTQPLANARTOB(H422Alpha, 2, 1, ABGR, 4, 4, 1)
-TESTQPLANARTOB(F422Alpha, 2, 1, ARGB, 4, 4, 1)
-TESTQPLANARTOB(F422Alpha, 2, 1, ABGR, 4, 4, 1)
-TESTQPLANARTOB(U422Alpha, 2, 1, ARGB, 4, 4, 1)
-TESTQPLANARTOB(U422Alpha, 2, 1, ABGR, 4, 4, 1)
-TESTQPLANARTOB(V422Alpha, 2, 1, ARGB, 4, 4, 1)
-TESTQPLANARTOB(V422Alpha, 2, 1, ABGR, 4, 4, 1)
-TESTQPLANARTOB(I444Alpha, 1, 1, ARGB, 4, 4, 1)
-TESTQPLANARTOB(I444Alpha, 1, 1, ABGR, 4, 4, 1)
-TESTQPLANARTOB(J444Alpha, 1, 1, ARGB, 4, 4, 1)
-TESTQPLANARTOB(J444Alpha, 1, 1, ABGR, 4, 4, 1)
-TESTQPLANARTOB(H444Alpha, 1, 1, ARGB, 4, 4, 1)
-TESTQPLANARTOB(H444Alpha, 1, 1, ABGR, 4, 4, 1)
-TESTQPLANARTOB(F444Alpha, 1, 1, ARGB, 4, 4, 1)
-TESTQPLANARTOB(F444Alpha, 1, 1, ABGR, 4, 4, 1)
-TESTQPLANARTOB(U444Alpha, 1, 1, ARGB, 4, 4, 1)
-TESTQPLANARTOB(U444Alpha, 1, 1, ABGR, 4, 4, 1)
-TESTQPLANARTOB(V444Alpha, 1, 1, ARGB, 4, 4, 1)
-TESTQPLANARTOB(V444Alpha, 1, 1, ABGR, 4, 4, 1)
-TESTQPLANARTOB(I420Alpha, 2, 2, ARGBFilter, 4, 4, 1)
-TESTQPLANARTOB(I422Alpha, 2, 1, ARGBFilter, 4, 4, 1)
-#else
-TESTQPLANARTOB(I420Alpha, 2, 2, ARGB, 4, 4, 1)
-TESTQPLANARTOB(I422Alpha, 2, 1, ARGB, 4, 4, 1)
-TESTQPLANARTOB(I444Alpha, 1, 1, ARGB, 4, 4, 1)
-TESTQPLANARTOB(I420Alpha, 2, 2, ARGBFilter, 4, 4, 1)
-TESTQPLANARTOB(I422Alpha, 2, 1, ARGBFilter, 4, 4, 1)
-#endif
-
-#define TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, \
- BPP_B, W1280, N, NEG, OFF) \
+#define TESTBPTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \
+ W1280, N, NEG, OFF) \
TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##N) { \
const int kWidth = W1280; \
const int kHeight = benchmark_height_; \
@@ -1110,15 +438,21 @@ TESTQPLANARTOB(I422Alpha, 2, 1, ARGBFilter, 4, 4, 1)
free_aligned_buffer_page_end(dst_argb32_opt); \
}
-#define TESTBIPLANARTOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B) \
- TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \
- benchmark_width_ + 1, _Any, +, 0) \
- TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \
- benchmark_width_, _Unaligned, +, 2) \
- TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \
- benchmark_width_, _Invert, -, 0) \
- TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \
- benchmark_width_, _Opt, +, 0)
+#if defined(ENABLE_FULL_TESTS)
+#define TESTBPTOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B) \
+ TESTBPTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \
+ benchmark_width_ + 1, _Any, +, 0) \
+ TESTBPTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \
+ benchmark_width_, _Unaligned, +, 2) \
+ TESTBPTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \
+ benchmark_width_, _Invert, -, 0) \
+ TESTBPTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \
+ benchmark_width_, _Opt, +, 0)
+#else
+#define TESTBPTOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B) \
+ TESTBPTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \
+ benchmark_width_, _Opt, +, 0)
+#endif
#define JNV12ToARGB(a, b, c, d, e, f, g, h) \
NV12ToARGBMatrix(a, b, c, d, e, f, &kYuvJPEGConstants, g, h)
@@ -1139,187 +473,30 @@ TESTQPLANARTOB(I422Alpha, 2, 1, ARGBFilter, 4, 4, 1)
#define JNV12ToRGB565(a, b, c, d, e, f, g, h) \
NV12ToRGB565Matrix(a, b, c, d, e, f, &kYuvJPEGConstants, g, h)
-TESTBIPLANARTOB(JNV12, 2, 2, ARGB, ARGB, 4)
-TESTBIPLANARTOB(JNV21, 2, 2, ARGB, ARGB, 4)
-TESTBIPLANARTOB(JNV12, 2, 2, ABGR, ABGR, 4)
-TESTBIPLANARTOB(JNV21, 2, 2, ABGR, ABGR, 4)
-TESTBIPLANARTOB(JNV12, 2, 2, RGB24, RGB24, 3)
-TESTBIPLANARTOB(JNV21, 2, 2, RGB24, RGB24, 3)
-TESTBIPLANARTOB(JNV12, 2, 2, RAW, RAW, 3)
-TESTBIPLANARTOB(JNV21, 2, 2, RAW, RAW, 3)
-#ifdef LITTLE_ENDIAN_ONLY_TEST
-TESTBIPLANARTOB(JNV12, 2, 2, RGB565, RGB565, 2)
-#endif
-
-TESTBIPLANARTOB(NV12, 2, 2, ARGB, ARGB, 4)
-TESTBIPLANARTOB(NV21, 2, 2, ARGB, ARGB, 4)
-TESTBIPLANARTOB(NV12, 2, 2, ABGR, ABGR, 4)
-TESTBIPLANARTOB(NV21, 2, 2, ABGR, ABGR, 4)
-TESTBIPLANARTOB(NV12, 2, 2, RGB24, RGB24, 3)
-TESTBIPLANARTOB(NV21, 2, 2, RGB24, RGB24, 3)
-TESTBIPLANARTOB(NV12, 2, 2, RAW, RAW, 3)
-TESTBIPLANARTOB(NV21, 2, 2, RAW, RAW, 3)
-TESTBIPLANARTOB(NV21, 2, 2, YUV24, RAW, 3)
+TESTBPTOB(JNV12, 2, 2, ARGB, ARGB, 4)
+TESTBPTOB(JNV21, 2, 2, ARGB, ARGB, 4)
+TESTBPTOB(JNV12, 2, 2, ABGR, ABGR, 4)
+TESTBPTOB(JNV21, 2, 2, ABGR, ABGR, 4)
+TESTBPTOB(JNV12, 2, 2, RGB24, RGB24, 3)
+TESTBPTOB(JNV21, 2, 2, RGB24, RGB24, 3)
+TESTBPTOB(JNV12, 2, 2, RAW, RAW, 3)
+TESTBPTOB(JNV21, 2, 2, RAW, RAW, 3)
#ifdef LITTLE_ENDIAN_ONLY_TEST
-TESTBIPLANARTOB(NV12, 2, 2, RGB565, RGB565, 2)
-#endif
-
-#define TESTATOPLANARI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
- W1280, N, NEG, OFF) \
- TEST_F(LibYUVConvertTest, FMT_A##To##FMT_PLANAR##N) { \
- const int kWidth = W1280; \
- const int kHeight = ALIGNINT(benchmark_height_, YALIGN); \
- const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X); \
- const int kStride = (kStrideUV * SUBSAMP_X * 8 * BPP_A + 7) / 8; \
- align_buffer_page_end(src_argb, kStride* kHeight + OFF); \
- align_buffer_page_end(dst_y_c, kWidth* kHeight); \
- align_buffer_page_end(dst_uv_c, \
- kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
- align_buffer_page_end(dst_y_opt, kWidth* kHeight); \
- align_buffer_page_end(dst_uv_opt, \
- kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
- memset(dst_y_c, 1, kWidth* kHeight); \
- memset(dst_uv_c, 2, kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
- memset(dst_y_opt, 101, kWidth* kHeight); \
- memset(dst_uv_opt, 102, kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
- for (int i = 0; i < kHeight; ++i) \
- for (int j = 0; j < kStride; ++j) \
- src_argb[(i * kStride) + j + OFF] = (fastrand() & 0xff); \
- MaskCpuFlags(disable_cpu_flags_); \
- FMT_A##To##FMT_PLANAR(src_argb + OFF, kStride, dst_y_c, kWidth, dst_uv_c, \
- kStrideUV * 2, dst_uv_c + kStrideUV, kStrideUV * 2, \
- kWidth, NEG kHeight); \
- MaskCpuFlags(benchmark_cpu_info_); \
- for (int i = 0; i < benchmark_iterations_; ++i) { \
- FMT_A##To##FMT_PLANAR(src_argb + OFF, kStride, dst_y_opt, kWidth, \
- dst_uv_opt, kStrideUV * 2, dst_uv_opt + kStrideUV, \
- kStrideUV * 2, kWidth, NEG kHeight); \
- } \
- for (int i = 0; i < kHeight; ++i) { \
- for (int j = 0; j < kWidth; ++j) { \
- EXPECT_EQ(dst_y_c[i * kWidth + j], dst_y_opt[i * kWidth + j]); \
- } \
- } \
- for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y) * 2; ++i) { \
- for (int j = 0; j < kStrideUV; ++j) { \
- EXPECT_EQ(dst_uv_c[i * kStrideUV + j], dst_uv_opt[i * kStrideUV + j]); \
- } \
- } \
- free_aligned_buffer_page_end(dst_y_c); \
- free_aligned_buffer_page_end(dst_uv_c); \
- free_aligned_buffer_page_end(dst_y_opt); \
- free_aligned_buffer_page_end(dst_uv_opt); \
- free_aligned_buffer_page_end(src_argb); \
- }
-
-#if defined(ENABLE_FULL_TESTS)
-#define TESTATOPLANAR(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y) \
- TESTATOPLANARI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
- benchmark_width_ + 1, _Any, +, 0) \
- TESTATOPLANARI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
- benchmark_width_, _Unaligned, +, 2) \
- TESTATOPLANARI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
- benchmark_width_, _Invert, -, 0) \
- TESTATOPLANARI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
- benchmark_width_, _Opt, +, 0)
-#else
-#define TESTATOPLANAR(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y) \
- TESTATOPLANARI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
- benchmark_width_ + 1, _Any, +, 0) \
- TESTATOPLANARI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
- benchmark_width_, _Opt, +, 0)
+TESTBPTOB(JNV12, 2, 2, RGB565, RGB565, 2)
#endif
-TESTATOPLANAR(ABGR, 4, 1, I420, 2, 2)
-TESTATOPLANAR(ARGB, 4, 1, I420, 2, 2)
-TESTATOPLANAR(ARGB, 4, 1, I422, 2, 1)
-TESTATOPLANAR(ARGB, 4, 1, I444, 1, 1)
-TESTATOPLANAR(ARGB, 4, 1, J420, 2, 2)
-TESTATOPLANAR(ARGB, 4, 1, J422, 2, 1)
+TESTBPTOB(NV12, 2, 2, ARGB, ARGB, 4)
+TESTBPTOB(NV21, 2, 2, ARGB, ARGB, 4)
+TESTBPTOB(NV12, 2, 2, ABGR, ABGR, 4)
+TESTBPTOB(NV21, 2, 2, ABGR, ABGR, 4)
+TESTBPTOB(NV12, 2, 2, RGB24, RGB24, 3)
+TESTBPTOB(NV21, 2, 2, RGB24, RGB24, 3)
+TESTBPTOB(NV12, 2, 2, RAW, RAW, 3)
+TESTBPTOB(NV21, 2, 2, RAW, RAW, 3)
+TESTBPTOB(NV21, 2, 2, YUV24, RAW, 3)
#ifdef LITTLE_ENDIAN_ONLY_TEST
-TESTATOPLANAR(ARGB4444, 2, 1, I420, 2, 2)
-TESTATOPLANAR(RGB565, 2, 1, I420, 2, 2)
-TESTATOPLANAR(ARGB1555, 2, 1, I420, 2, 2)
+TESTBPTOB(NV12, 2, 2, RGB565, RGB565, 2)
#endif
-TESTATOPLANAR(BGRA, 4, 1, I420, 2, 2)
-TESTATOPLANAR(I400, 1, 1, I420, 2, 2)
-TESTATOPLANAR(J400, 1, 1, J420, 2, 2)
-TESTATOPLANAR(RAW, 3, 1, I420, 2, 2)
-TESTATOPLANAR(RAW, 3, 1, J420, 2, 2)
-TESTATOPLANAR(RGB24, 3, 1, I420, 2, 2)
-TESTATOPLANAR(RGB24, 3, 1, J420, 2, 2)
-TESTATOPLANAR(RGBA, 4, 1, I420, 2, 2)
-TESTATOPLANAR(UYVY, 2, 1, I420, 2, 2)
-TESTATOPLANAR(UYVY, 2, 1, I422, 2, 1)
-TESTATOPLANAR(YUY2, 2, 1, I420, 2, 2)
-TESTATOPLANAR(YUY2, 2, 1, I422, 2, 1)
-
-#define TESTATOBIPLANARI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, \
- SUBSAMP_Y, W1280, N, NEG, OFF) \
- TEST_F(LibYUVConvertTest, FMT_A##To##FMT_PLANAR##N) { \
- const int kWidth = W1280; \
- const int kHeight = benchmark_height_; \
- const int kStride = SUBSAMPLE(kWidth, SUB_A) * BPP_A; \
- const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X); \
- align_buffer_page_end(src_argb, kStride* kHeight + OFF); \
- align_buffer_page_end(dst_y_c, kWidth* kHeight); \
- align_buffer_page_end(dst_uv_c, \
- kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
- align_buffer_page_end(dst_y_opt, kWidth* kHeight); \
- align_buffer_page_end(dst_uv_opt, \
- kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
- for (int i = 0; i < kHeight; ++i) \
- for (int j = 0; j < kStride; ++j) \
- src_argb[(i * kStride) + j + OFF] = (fastrand() & 0xff); \
- memset(dst_y_c, 1, kWidth* kHeight); \
- memset(dst_uv_c, 2, kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
- memset(dst_y_opt, 101, kWidth* kHeight); \
- memset(dst_uv_opt, 102, kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
- MaskCpuFlags(disable_cpu_flags_); \
- FMT_A##To##FMT_PLANAR(src_argb + OFF, kStride, dst_y_c, kWidth, dst_uv_c, \
- kStrideUV * 2, kWidth, NEG kHeight); \
- MaskCpuFlags(benchmark_cpu_info_); \
- for (int i = 0; i < benchmark_iterations_; ++i) { \
- FMT_A##To##FMT_PLANAR(src_argb + OFF, kStride, dst_y_opt, kWidth, \
- dst_uv_opt, kStrideUV * 2, kWidth, NEG kHeight); \
- } \
- for (int i = 0; i < kHeight; ++i) { \
- for (int j = 0; j < kWidth; ++j) { \
- EXPECT_EQ(dst_y_c[i * kWidth + j], dst_y_opt[i * kWidth + j]); \
- } \
- } \
- for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \
- for (int j = 0; j < kStrideUV * 2; ++j) { \
- EXPECT_EQ(dst_uv_c[i * kStrideUV * 2 + j], \
- dst_uv_opt[i * kStrideUV * 2 + j]); \
- } \
- } \
- free_aligned_buffer_page_end(dst_y_c); \
- free_aligned_buffer_page_end(dst_uv_c); \
- free_aligned_buffer_page_end(dst_y_opt); \
- free_aligned_buffer_page_end(dst_uv_opt); \
- free_aligned_buffer_page_end(src_argb); \
- }
-
-#define TESTATOBIPLANAR(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y) \
- TESTATOBIPLANARI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
- benchmark_width_ + 1, _Any, +, 0) \
- TESTATOBIPLANARI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
- benchmark_width_, _Unaligned, +, 2) \
- TESTATOBIPLANARI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
- benchmark_width_, _Invert, -, 0) \
- TESTATOBIPLANARI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
- benchmark_width_, _Opt, +, 0)
-
-TESTATOBIPLANAR(ARGB, 1, 4, NV12, 2, 2)
-TESTATOBIPLANAR(ARGB, 1, 4, NV21, 2, 2)
-TESTATOBIPLANAR(ABGR, 1, 4, NV12, 2, 2)
-TESTATOBIPLANAR(ABGR, 1, 4, NV21, 2, 2)
-TESTATOBIPLANAR(RAW, 1, 3, JNV21, 2, 2)
-TESTATOBIPLANAR(YUY2, 2, 4, NV12, 2, 2)
-TESTATOBIPLANAR(UYVY, 2, 4, NV12, 2, 2)
-TESTATOBIPLANAR(AYUV, 1, 4, NV12, 2, 2)
-TESTATOBIPLANAR(AYUV, 1, 4, NV21, 2, 2)
#define TESTATOBI(FMT_A, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, FMT_B, TYPE_B, \
EPP_B, STRIDE_B, HEIGHT_B, W1280, N, NEG, OFF) \
@@ -1440,6 +617,7 @@ TESTATOB(ARGB, uint8_t, 4, 4, 1, ARGBMirror, uint8_t, 4, 4, 1)
TESTATOB(ARGB, uint8_t, 4, 4, 1, BGRA, uint8_t, 4, 4, 1)
TESTATOB(ARGB, uint8_t, 4, 4, 1, I400, uint8_t, 1, 1, 1)
TESTATOB(ARGB, uint8_t, 4, 4, 1, J400, uint8_t, 1, 1, 1)
+TESTATOB(ABGR, uint8_t, 4, 4, 1, J400, uint8_t, 1, 1, 1)
TESTATOB(RGBA, uint8_t, 4, 4, 1, J400, uint8_t, 1, 1, 1)
TESTATOB(ARGB, uint8_t, 4, 4, 1, RAW, uint8_t, 3, 3, 1)
TESTATOB(ARGB, uint8_t, 4, 4, 1, RGB24, uint8_t, 3, 3, 1)
@@ -1450,7 +628,7 @@ TESTATOB(ARGB, uint8_t, 4, 4, 1, RGB565, uint8_t, 2, 2, 1)
#endif
TESTATOB(ARGB, uint8_t, 4, 4, 1, RGBA, uint8_t, 4, 4, 1)
TESTATOB(ARGB, uint8_t, 4, 4, 1, UYVY, uint8_t, 2, 4, 1)
-TESTATOB(ARGB, uint8_t, 4, 4, 1, YUY2, uint8_t, 2, 4, 1) // 4
+TESTATOB(ARGB, uint8_t, 4, 4, 1, YUY2, uint8_t, 2, 4, 1)
TESTATOB(ARGB1555, uint8_t, 2, 2, 1, ARGB, uint8_t, 4, 4, 1)
TESTATOB(ARGB4444, uint8_t, 2, 2, 1, ARGB, uint8_t, 4, 4, 1)
TESTATOB(BGRA, uint8_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1)
@@ -1484,6 +662,127 @@ TESTATOB(AB64, uint16_t, 4, 4, 1, ABGR, uint8_t, 4, 4, 1)
TESTATOB(AR64, uint16_t, 4, 4, 1, AB64, uint16_t, 4, 4, 1)
TESTATOB(AB64, uint16_t, 4, 4, 1, AR64, uint16_t, 4, 4, 1)
+// in place test
+#define TESTATOAI(FMT_A, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, FMT_B, TYPE_B, \
+ EPP_B, STRIDE_B, HEIGHT_B, W1280, N, NEG, OFF) \
+ TEST_F(LibYUVConvertTest, FMT_A##To##FMT_B##N) { \
+ const int kWidth = W1280; \
+ const int kHeight = benchmark_height_; \
+ const int kHeightA = (kHeight + HEIGHT_A - 1) / HEIGHT_A * HEIGHT_A; \
+ const int kHeightB = (kHeight + HEIGHT_B - 1) / HEIGHT_B * HEIGHT_B; \
+ const int kStrideA = \
+ (kWidth * EPP_A + STRIDE_A - 1) / STRIDE_A * STRIDE_A; \
+ const int kStrideB = \
+ (kWidth * EPP_B + STRIDE_B - 1) / STRIDE_B * STRIDE_B; \
+ align_buffer_page_end(src_argb, \
+ kStrideA* kHeightA*(int)sizeof(TYPE_A) + OFF); \
+ align_buffer_page_end(dst_argb_c, \
+ kStrideA* kHeightA*(int)sizeof(TYPE_A) + OFF); \
+ align_buffer_page_end(dst_argb_opt, \
+ kStrideA* kHeightA*(int)sizeof(TYPE_A) + OFF); \
+ for (int i = 0; i < kStrideA * kHeightA * (int)sizeof(TYPE_A); ++i) { \
+ src_argb[i + OFF] = (fastrand() & 0xff); \
+ } \
+ memcpy(dst_argb_c + OFF, src_argb, \
+ kStrideA * kHeightA * (int)sizeof(TYPE_A)); \
+ memcpy(dst_argb_opt + OFF, src_argb, \
+ kStrideA * kHeightA * (int)sizeof(TYPE_A)); \
+ MaskCpuFlags(disable_cpu_flags_); \
+ FMT_A##To##FMT_B((TYPE_A*)(dst_argb_c /* src */ + OFF), kStrideA, \
+ (TYPE_B*)dst_argb_c, kStrideB, kWidth, NEG kHeight); \
+ MaskCpuFlags(benchmark_cpu_info_); \
+ for (int i = 0; i < benchmark_iterations_; ++i) { \
+ FMT_A##To##FMT_B((TYPE_A*)(dst_argb_opt /* src */ + OFF), kStrideA, \
+ (TYPE_B*)dst_argb_opt, kStrideB, kWidth, NEG kHeight); \
+ } \
+ memcpy(dst_argb_opt + OFF, src_argb, \
+ kStrideA * kHeightA * (int)sizeof(TYPE_A)); \
+ FMT_A##To##FMT_B((TYPE_A*)(dst_argb_opt /* src */ + OFF), kStrideA, \
+ (TYPE_B*)dst_argb_opt, kStrideB, kWidth, NEG kHeight); \
+ for (int i = 0; i < kStrideB * kHeightB * (int)sizeof(TYPE_B); ++i) { \
+ EXPECT_EQ(dst_argb_c[i], dst_argb_opt[i]); \
+ } \
+ free_aligned_buffer_page_end(src_argb); \
+ free_aligned_buffer_page_end(dst_argb_c); \
+ free_aligned_buffer_page_end(dst_argb_opt); \
+ }
+
+#define TESTATOA(FMT_A, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, FMT_B, TYPE_B, \
+ EPP_B, STRIDE_B, HEIGHT_B) \
+ TESTATOAI(FMT_A, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, FMT_B, TYPE_B, EPP_B, \
+ STRIDE_B, HEIGHT_B, benchmark_width_, _Inplace, +, 0)
+
+TESTATOA(AB30, uint8_t, 4, 4, 1, ABGR, uint8_t, 4, 4, 1)
+TESTATOA(AB30, uint8_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1)
+#ifdef LITTLE_ENDIAN_ONLY_TEST
+TESTATOA(ABGR, uint8_t, 4, 4, 1, AR30, uint8_t, 4, 4, 1)
+#endif
+TESTATOA(ABGR, uint8_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1)
+#ifdef LITTLE_ENDIAN_ONLY_TEST
+TESTATOA(AR30, uint8_t, 4, 4, 1, AB30, uint8_t, 4, 4, 1)
+#endif
+TESTATOA(AR30, uint8_t, 4, 4, 1, ABGR, uint8_t, 4, 4, 1)
+#ifdef LITTLE_ENDIAN_ONLY_TEST
+TESTATOA(AR30, uint8_t, 4, 4, 1, AR30, uint8_t, 4, 4, 1)
+TESTATOA(AR30, uint8_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1)
+#endif
+TESTATOA(ARGB, uint8_t, 4, 4, 1, ABGR, uint8_t, 4, 4, 1)
+#ifdef LITTLE_ENDIAN_ONLY_TEST
+TESTATOA(ARGB, uint8_t, 4, 4, 1, AR30, uint8_t, 4, 4, 1)
+#endif
+TESTATOA(ARGB, uint8_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1)
+TESTATOA(ARGB, uint8_t, 4, 4, 1, ARGB1555, uint8_t, 2, 2, 1)
+TESTATOA(ARGB, uint8_t, 4, 4, 1, ARGB4444, uint8_t, 2, 2, 1)
+// TODO(fbarchard): Support in place for mirror.
+// TESTATOA(ARGB, uint8_t, 4, 4, 1, ARGBMirror, uint8_t, 4, 4, 1)
+TESTATOA(ARGB, uint8_t, 4, 4, 1, BGRA, uint8_t, 4, 4, 1)
+TESTATOA(ARGB, uint8_t, 4, 4, 1, I400, uint8_t, 1, 1, 1)
+TESTATOA(ARGB, uint8_t, 4, 4, 1, J400, uint8_t, 1, 1, 1)
+TESTATOA(RGBA, uint8_t, 4, 4, 1, J400, uint8_t, 1, 1, 1)
+TESTATOA(ARGB, uint8_t, 4, 4, 1, RAW, uint8_t, 3, 3, 1)
+TESTATOA(ARGB, uint8_t, 4, 4, 1, RGB24, uint8_t, 3, 3, 1)
+TESTATOA(ABGR, uint8_t, 4, 4, 1, RAW, uint8_t, 3, 3, 1)
+TESTATOA(ABGR, uint8_t, 4, 4, 1, RGB24, uint8_t, 3, 3, 1)
+#ifdef LITTLE_ENDIAN_ONLY_TEST
+TESTATOA(ARGB, uint8_t, 4, 4, 1, RGB565, uint8_t, 2, 2, 1)
+#endif
+TESTATOA(ARGB, uint8_t, 4, 4, 1, RGBA, uint8_t, 4, 4, 1)
+TESTATOA(ARGB, uint8_t, 4, 4, 1, UYVY, uint8_t, 2, 4, 1)
+TESTATOA(ARGB, uint8_t, 4, 4, 1, YUY2, uint8_t, 2, 4, 1)
+// TODO(fbarchard): Support in place for conversions that increase bpp.
+// TESTATOA(ARGB1555, uint8_t, 2, 2, 1, ARGB, uint8_t, 4, 4, 1)
+// TESTATOA(ARGB4444, uint8_t, 2, 2, 1, ARGB, uint8_t, 4, 4, 1)
+TESTATOA(BGRA, uint8_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1)
+// TESTATOA(I400, uint8_t, 1, 1, 1, ARGB, uint8_t, 4, 4, 1)
+TESTATOA(I400, uint8_t, 1, 1, 1, I400, uint8_t, 1, 1, 1)
+// TESTATOA(I400, uint8_t, 1, 1, 1, I400Mirror, uint8_t, 1, 1, 1)
+// TESTATOA(J400, uint8_t, 1, 1, 1, ARGB, uint8_t, 4, 4, 1)
+TESTATOA(J400, uint8_t, 1, 1, 1, J400, uint8_t, 1, 1, 1)
+// TESTATOA(RAW, uint8_t, 3, 3, 1, ARGB, uint8_t, 4, 4, 1)
+// TESTATOA(RAW, uint8_t, 3, 3, 1, RGBA, uint8_t, 4, 4, 1)
+TESTATOA(RAW, uint8_t, 3, 3, 1, RGB24, uint8_t, 3, 3, 1)
+// TESTATOA(RGB24, uint8_t, 3, 3, 1, ARGB, uint8_t, 4, 4, 1)
+TESTATOA(RGB24, uint8_t, 3, 3, 1, J400, uint8_t, 1, 1, 1)
+// TESTATOA(RGB24, uint8_t, 3, 3, 1, RGB24Mirror, uint8_t, 3, 3, 1)
+TESTATOA(RAW, uint8_t, 3, 3, 1, J400, uint8_t, 1, 1, 1)
+#ifdef LITTLE_ENDIAN_ONLY_TEST
+// TESTATOA(RGB565, uint8_t, 2, 2, 1, ARGB, uint8_t, 4, 4, 1)
+#endif
+TESTATOA(RGBA, uint8_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1)
+// TESTATOA(UYVY, uint8_t, 2, 4, 1, ARGB, uint8_t, 4, 4, 1)
+// TESTATOA(YUY2, uint8_t, 2, 4, 1, ARGB, uint8_t, 4, 4, 1)
+TESTATOA(YUY2, uint8_t, 2, 4, 1, Y, uint8_t, 1, 1, 1)
+// TESTATOA(ARGB, uint8_t, 4, 4, 1, AR64, uint16_t, 4, 4, 1)
+// TESTATOA(ARGB, uint8_t, 4, 4, 1, AB64, uint16_t, 4, 4, 1)
+// TESTATOA(ABGR, uint8_t, 4, 4, 1, AR64, uint16_t, 4, 4, 1)
+// TESTATOA(ABGR, uint8_t, 4, 4, 1, AB64, uint16_t, 4, 4, 1)
+TESTATOA(AR64, uint16_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1)
+TESTATOA(AB64, uint16_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1)
+TESTATOA(AR64, uint16_t, 4, 4, 1, ABGR, uint8_t, 4, 4, 1)
+TESTATOA(AB64, uint16_t, 4, 4, 1, ABGR, uint8_t, 4, 4, 1)
+TESTATOA(AR64, uint16_t, 4, 4, 1, AB64, uint16_t, 4, 4, 1)
+TESTATOA(AB64, uint16_t, 4, 4, 1, AR64, uint16_t, 4, 4, 1)
+
#define TESTATOBDI(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, FMT_B, BPP_B, STRIDE_B, \
HEIGHT_B, W1280, N, NEG, OFF) \
TEST_F(LibYUVConvertTest, FMT_A##To##FMT_B##Dither##N) { \
@@ -1554,6 +853,7 @@ TESTATOB(AB64, uint16_t, 4, 4, 1, AR64, uint16_t, 4, 4, 1)
} \
}
+#if defined(ENABLE_FULL_TESTS)
#define TESTATOBD(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, FMT_B, BPP_B, STRIDE_B, \
HEIGHT_B) \
TESTATOBDI(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, FMT_B, BPP_B, STRIDE_B, \
@@ -1566,6 +866,12 @@ TESTATOB(AB64, uint16_t, 4, 4, 1, AR64, uint16_t, 4, 4, 1)
HEIGHT_B, benchmark_width_, _Opt, +, 0) \
TESTATOBDRANDOM(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, FMT_B, BPP_B, STRIDE_B, \
HEIGHT_B)
+#else
+#define TESTATOBD(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, FMT_B, BPP_B, STRIDE_B, \
+ HEIGHT_B) \
+ TESTATOBDRANDOM(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, FMT_B, BPP_B, STRIDE_B, \
+ HEIGHT_B)
+#endif
#ifdef LITTLE_ENDIAN_ONLY_TEST
TESTATOBD(ARGB, 4, 4, 1, RGB565, 2, 2, 1)
@@ -1634,1081 +940,217 @@ TESTEND(BGRAToARGB, uint8_t, 4, 4, 1)
TESTEND(ABGRToARGB, uint8_t, 4, 4, 1)
TESTEND(AB64ToAR64, uint16_t, 4, 4, 1)
-#ifdef HAVE_JPEG
-TEST_F(LibYUVConvertTest, ValidateJpeg) {
- const int kOff = 10;
- const int kMinJpeg = 64;
- const int kImageSize = benchmark_width_ * benchmark_height_ >= kMinJpeg
- ? benchmark_width_ * benchmark_height_
- : kMinJpeg;
- const int kSize = kImageSize + kOff;
- align_buffer_page_end(orig_pixels, kSize);
-
- // No SOI or EOI. Expect fail.
- memset(orig_pixels, 0, kSize);
- EXPECT_FALSE(ValidateJpeg(orig_pixels, kSize));
-
- // Test special value that matches marker start.
- memset(orig_pixels, 0xff, kSize);
- EXPECT_FALSE(ValidateJpeg(orig_pixels, kSize));
-
- // EOI, SOI. Expect pass.
- orig_pixels[0] = 0xff;
- orig_pixels[1] = 0xd8; // SOI.
- orig_pixels[2] = 0xff;
- orig_pixels[kSize - kOff + 0] = 0xff;
- orig_pixels[kSize - kOff + 1] = 0xd9; // EOI.
- for (int times = 0; times < benchmark_iterations_; ++times) {
- EXPECT_TRUE(ValidateJpeg(orig_pixels, kSize));
- }
- free_aligned_buffer_page_end(orig_pixels);
-}
-
-TEST_F(LibYUVConvertTest, ValidateJpegLarge) {
- const int kOff = 10;
- const int kMinJpeg = 64;
- const int kImageSize = benchmark_width_ * benchmark_height_ >= kMinJpeg
- ? benchmark_width_ * benchmark_height_
- : kMinJpeg;
- const int kSize = kImageSize + kOff;
- const int kMultiple = 10;
- const int kBufSize = kImageSize * kMultiple + kOff;
- align_buffer_page_end(orig_pixels, kBufSize);
-
- // No SOI or EOI. Expect fail.
- memset(orig_pixels, 0, kBufSize);
- EXPECT_FALSE(ValidateJpeg(orig_pixels, kBufSize));
-
- // EOI, SOI. Expect pass.
- orig_pixels[0] = 0xff;
- orig_pixels[1] = 0xd8; // SOI.
- orig_pixels[2] = 0xff;
- orig_pixels[kSize - kOff + 0] = 0xff;
- orig_pixels[kSize - kOff + 1] = 0xd9; // EOI.
- for (int times = 0; times < benchmark_iterations_; ++times) {
- EXPECT_TRUE(ValidateJpeg(orig_pixels, kBufSize));
- }
- free_aligned_buffer_page_end(orig_pixels);
-}
-
-TEST_F(LibYUVConvertTest, InvalidateJpeg) {
- const int kOff = 10;
- const int kMinJpeg = 64;
- const int kImageSize = benchmark_width_ * benchmark_height_ >= kMinJpeg
- ? benchmark_width_ * benchmark_height_
- : kMinJpeg;
- const int kSize = kImageSize + kOff;
- align_buffer_page_end(orig_pixels, kSize);
-
- // NULL pointer. Expect fail.
- EXPECT_FALSE(ValidateJpeg(NULL, kSize));
-
- // Negative size. Expect fail.
- EXPECT_FALSE(ValidateJpeg(orig_pixels, -1));
-
- // Too large size. Expect fail.
- EXPECT_FALSE(ValidateJpeg(orig_pixels, 0xfb000000ull));
-
- // No SOI or EOI. Expect fail.
- memset(orig_pixels, 0, kSize);
- EXPECT_FALSE(ValidateJpeg(orig_pixels, kSize));
-
- // SOI but no EOI. Expect fail.
- orig_pixels[0] = 0xff;
- orig_pixels[1] = 0xd8; // SOI.
- orig_pixels[2] = 0xff;
- for (int times = 0; times < benchmark_iterations_; ++times) {
- EXPECT_FALSE(ValidateJpeg(orig_pixels, kSize));
- }
-
- // EOI but no SOI. Expect fail.
- orig_pixels[0] = 0;
- orig_pixels[1] = 0;
- orig_pixels[kSize - kOff + 0] = 0xff;
- orig_pixels[kSize - kOff + 1] = 0xd9; // EOI.
- EXPECT_FALSE(ValidateJpeg(orig_pixels, kSize));
-
- free_aligned_buffer_page_end(orig_pixels);
-}
-
-TEST_F(LibYUVConvertTest, FuzzJpeg) {
- // SOI but no EOI. Expect fail.
- for (int times = 0; times < benchmark_iterations_; ++times) {
- const int kSize = fastrand() % 5000 + 3;
- align_buffer_page_end(orig_pixels, kSize);
- MemRandomize(orig_pixels, kSize);
-
- // Add SOI so frame will be scanned.
- orig_pixels[0] = 0xff;
- orig_pixels[1] = 0xd8; // SOI.
- orig_pixels[2] = 0xff;
- orig_pixels[kSize - 1] = 0xff;
- ValidateJpeg(orig_pixels,
- kSize); // Failure normally expected.
- free_aligned_buffer_page_end(orig_pixels);
- }
-}
-
-// Test data created in GIMP. In export jpeg, disable
-// thumbnails etc, choose a subsampling, and use low quality
-// (50) to keep size small. Generated with xxd -i test.jpg
-// test 0 is J400
-static const uint8_t kTest0Jpg[] = {
- 0xff, 0xd8, 0xff, 0xe0, 0x00, 0x10, 0x4a, 0x46, 0x49, 0x46, 0x00, 0x01,
- 0x01, 0x01, 0x00, 0x48, 0x00, 0x48, 0x00, 0x00, 0xff, 0xdb, 0x00, 0x43,
- 0x00, 0x10, 0x0b, 0x0c, 0x0e, 0x0c, 0x0a, 0x10, 0x0e, 0x0d, 0x0e, 0x12,
- 0x11, 0x10, 0x13, 0x18, 0x28, 0x1a, 0x18, 0x16, 0x16, 0x18, 0x31, 0x23,
- 0x25, 0x1d, 0x28, 0x3a, 0x33, 0x3d, 0x3c, 0x39, 0x33, 0x38, 0x37, 0x40,
- 0x48, 0x5c, 0x4e, 0x40, 0x44, 0x57, 0x45, 0x37, 0x38, 0x50, 0x6d, 0x51,
- 0x57, 0x5f, 0x62, 0x67, 0x68, 0x67, 0x3e, 0x4d, 0x71, 0x79, 0x70, 0x64,
- 0x78, 0x5c, 0x65, 0x67, 0x63, 0xff, 0xc2, 0x00, 0x0b, 0x08, 0x00, 0x10,
- 0x00, 0x20, 0x01, 0x01, 0x11, 0x00, 0xff, 0xc4, 0x00, 0x17, 0x00, 0x01,
- 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x03, 0x04, 0x01, 0x02, 0xff, 0xda, 0x00, 0x08, 0x01,
- 0x01, 0x00, 0x00, 0x00, 0x01, 0x43, 0x7e, 0xa7, 0x97, 0x57, 0xff, 0xc4,
- 0x00, 0x1b, 0x10, 0x00, 0x03, 0x00, 0x02, 0x03, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x02, 0x11, 0x00, 0x03,
- 0x10, 0x12, 0x13, 0xff, 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x01, 0x05,
- 0x02, 0x3b, 0xc0, 0x6f, 0x66, 0x76, 0x56, 0x23, 0x87, 0x99, 0x0d, 0x26,
- 0x62, 0xf6, 0xbf, 0xff, 0xc4, 0x00, 0x1e, 0x10, 0x00, 0x02, 0x01, 0x03,
- 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x11, 0x21, 0x02, 0x12, 0x32, 0x10, 0x31, 0x71, 0x81, 0xa1, 0xff,
- 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x06, 0x3f, 0x02, 0x4b, 0xb3, 0x28,
- 0x32, 0xd2, 0xed, 0xf9, 0x1d, 0x3e, 0x13, 0x51, 0x73, 0x83, 0xff, 0xc4,
- 0x00, 0x1c, 0x10, 0x01, 0x01, 0x01, 0x00, 0x02, 0x03, 0x01, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x11, 0x00, 0x21, 0x51,
- 0x31, 0x61, 0x81, 0xf0, 0xff, 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x01,
- 0x3f, 0x21, 0x65, 0x6e, 0x31, 0x86, 0x28, 0xf9, 0x30, 0xdc, 0x27, 0xdb,
- 0xa9, 0x01, 0xf3, 0xde, 0x02, 0xa0, 0xed, 0x1e, 0x34, 0x68, 0x23, 0xf9,
- 0xc6, 0x48, 0x5d, 0x7a, 0x35, 0x02, 0xf5, 0x6f, 0xff, 0xda, 0x00, 0x08,
- 0x01, 0x01, 0x00, 0x00, 0x00, 0x10, 0x35, 0xff, 0xc4, 0x00, 0x1f, 0x10,
- 0x01, 0x00, 0x02, 0x01, 0x04, 0x03, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x11, 0x31, 0x41, 0x61, 0x71, 0x91,
- 0x21, 0x81, 0xd1, 0xb1, 0xff, 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x01,
- 0x3f, 0x10, 0x0b, 0x30, 0xe9, 0x58, 0xbe, 0x1a, 0xfd, 0x88, 0xab, 0x8b,
- 0x34, 0x74, 0x80, 0x4b, 0xb5, 0xd5, 0xab, 0xcd, 0x46, 0x96, 0x2e, 0xec,
- 0xbd, 0xaa, 0x78, 0x47, 0x5c, 0x47, 0xa7, 0x30, 0x49, 0xad, 0x88, 0x7c,
- 0x40, 0x74, 0x30, 0xff, 0x00, 0x23, 0x1d, 0x03, 0x0b, 0xb7, 0xd4, 0xff,
- 0xd9};
-static const size_t kTest0JpgLen = 421;
-
-// test 1 is J444
-static const uint8_t kTest1Jpg[] = {
- 0xff, 0xd8, 0xff, 0xe0, 0x00, 0x10, 0x4a, 0x46, 0x49, 0x46, 0x00, 0x01,
- 0x01, 0x01, 0x00, 0x48, 0x00, 0x48, 0x00, 0x00, 0xff, 0xdb, 0x00, 0x43,
- 0x00, 0x10, 0x0b, 0x0c, 0x0e, 0x0c, 0x0a, 0x10, 0x0e, 0x0d, 0x0e, 0x12,
- 0x11, 0x10, 0x13, 0x18, 0x28, 0x1a, 0x18, 0x16, 0x16, 0x18, 0x31, 0x23,
- 0x25, 0x1d, 0x28, 0x3a, 0x33, 0x3d, 0x3c, 0x39, 0x33, 0x38, 0x37, 0x40,
- 0x48, 0x5c, 0x4e, 0x40, 0x44, 0x57, 0x45, 0x37, 0x38, 0x50, 0x6d, 0x51,
- 0x57, 0x5f, 0x62, 0x67, 0x68, 0x67, 0x3e, 0x4d, 0x71, 0x79, 0x70, 0x64,
- 0x78, 0x5c, 0x65, 0x67, 0x63, 0xff, 0xdb, 0x00, 0x43, 0x01, 0x11, 0x12,
- 0x12, 0x18, 0x15, 0x18, 0x2f, 0x1a, 0x1a, 0x2f, 0x63, 0x42, 0x38, 0x42,
- 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63,
- 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63,
- 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63,
- 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63,
- 0x63, 0x63, 0xff, 0xc2, 0x00, 0x11, 0x08, 0x00, 0x10, 0x00, 0x20, 0x03,
- 0x01, 0x11, 0x00, 0x02, 0x11, 0x01, 0x03, 0x11, 0x01, 0xff, 0xc4, 0x00,
- 0x17, 0x00, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x04, 0x01, 0x02, 0xff, 0xc4,
- 0x00, 0x16, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x01, 0x03, 0xff, 0xda,
- 0x00, 0x0c, 0x03, 0x01, 0x00, 0x02, 0x10, 0x03, 0x10, 0x00, 0x00, 0x01,
- 0x40, 0x8f, 0x26, 0xe8, 0xf4, 0xcc, 0xf9, 0x69, 0x2b, 0x1b, 0x2a, 0xcb,
- 0xff, 0xc4, 0x00, 0x1b, 0x10, 0x00, 0x03, 0x00, 0x02, 0x03, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x02, 0x11,
- 0x00, 0x03, 0x10, 0x12, 0x13, 0xff, 0xda, 0x00, 0x08, 0x01, 0x01, 0x00,
- 0x01, 0x05, 0x02, 0x3b, 0x80, 0x6f, 0x56, 0x76, 0x56, 0x23, 0x87, 0x99,
- 0x0d, 0x26, 0x62, 0xf6, 0xbf, 0xff, 0xc4, 0x00, 0x19, 0x11, 0x01, 0x00,
- 0x03, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x01, 0x00, 0x10, 0x11, 0x02, 0x12, 0xff, 0xda, 0x00, 0x08,
- 0x01, 0x03, 0x01, 0x01, 0x3f, 0x01, 0xf1, 0x00, 0x27, 0x45, 0xbb, 0x31,
- 0xaf, 0xff, 0xc4, 0x00, 0x1a, 0x11, 0x00, 0x02, 0x03, 0x01, 0x01, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01,
- 0x02, 0x10, 0x11, 0x41, 0x12, 0xff, 0xda, 0x00, 0x08, 0x01, 0x02, 0x01,
- 0x01, 0x3f, 0x01, 0xf6, 0x4b, 0x5f, 0x48, 0xb3, 0x69, 0x63, 0x35, 0x72,
- 0xbf, 0xff, 0xc4, 0x00, 0x1e, 0x10, 0x00, 0x02, 0x01, 0x03, 0x05, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x11,
- 0x21, 0x02, 0x12, 0x32, 0x10, 0x31, 0x71, 0x81, 0xa1, 0xff, 0xda, 0x00,
- 0x08, 0x01, 0x01, 0x00, 0x06, 0x3f, 0x02, 0x4b, 0xb3, 0x28, 0x32, 0xd2,
- 0xed, 0xf9, 0x1d, 0x3e, 0x13, 0x51, 0x73, 0x83, 0xff, 0xc4, 0x00, 0x1c,
- 0x10, 0x01, 0x01, 0x01, 0x00, 0x02, 0x03, 0x01, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x11, 0x00, 0x21, 0x51, 0x31, 0x61,
- 0x81, 0xf0, 0xff, 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x01, 0x3f, 0x21,
- 0x75, 0x6e, 0x31, 0x94, 0x28, 0xf9, 0x30, 0xdc, 0x27, 0xdb, 0xa9, 0x01,
- 0xf3, 0xde, 0x02, 0xa0, 0xed, 0x1e, 0x34, 0x68, 0x23, 0xf9, 0xc6, 0x48,
- 0x5d, 0x7a, 0x35, 0x02, 0xf5, 0x6f, 0xff, 0xda, 0x00, 0x0c, 0x03, 0x01,
- 0x00, 0x02, 0x00, 0x03, 0x00, 0x00, 0x00, 0x10, 0x26, 0x61, 0xd4, 0xff,
- 0xc4, 0x00, 0x1a, 0x11, 0x00, 0x03, 0x01, 0x00, 0x03, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x11, 0x21,
- 0x31, 0x41, 0x51, 0xff, 0xda, 0x00, 0x08, 0x01, 0x03, 0x01, 0x01, 0x3f,
- 0x10, 0x54, 0xa8, 0xbf, 0x50, 0x87, 0xb0, 0x9d, 0x8b, 0xc4, 0x6a, 0x26,
- 0x6b, 0x2a, 0x9c, 0x1f, 0xff, 0xc4, 0x00, 0x18, 0x11, 0x01, 0x01, 0x01,
- 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x01, 0x00, 0x11, 0x21, 0x51, 0xff, 0xda, 0x00, 0x08, 0x01, 0x02,
- 0x01, 0x01, 0x3f, 0x10, 0x70, 0xe1, 0x3e, 0xd1, 0x8e, 0x0d, 0xe1, 0xb5,
- 0xd5, 0x91, 0x76, 0x43, 0x82, 0x45, 0x4c, 0x7b, 0x7f, 0xff, 0xc4, 0x00,
- 0x1f, 0x10, 0x01, 0x00, 0x02, 0x01, 0x04, 0x03, 0x01, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x11, 0x31, 0x41, 0x61,
- 0x71, 0x91, 0x21, 0x81, 0xd1, 0xb1, 0xff, 0xda, 0x00, 0x08, 0x01, 0x01,
- 0x00, 0x01, 0x3f, 0x10, 0x1b, 0x30, 0xe9, 0x58, 0xbe, 0x1a, 0xfd, 0x8a,
- 0xeb, 0x8b, 0x34, 0x74, 0x80, 0x4b, 0xb5, 0xd5, 0xab, 0xcd, 0x46, 0x96,
- 0x2e, 0xec, 0xbd, 0xaa, 0x78, 0x47, 0x5c, 0x47, 0xa7, 0x30, 0x49, 0xad,
- 0x88, 0x7c, 0x40, 0x74, 0x30, 0xff, 0x00, 0x23, 0x1d, 0x03, 0x0b, 0xb7,
- 0xd4, 0xff, 0xd9};
-static const size_t kTest1JpgLen = 735;
-
-// test 2 is J420
-static const uint8_t kTest2Jpg[] = {
- 0xff, 0xd8, 0xff, 0xe0, 0x00, 0x10, 0x4a, 0x46, 0x49, 0x46, 0x00, 0x01,
- 0x01, 0x01, 0x00, 0x48, 0x00, 0x48, 0x00, 0x00, 0xff, 0xdb, 0x00, 0x43,
- 0x00, 0x10, 0x0b, 0x0c, 0x0e, 0x0c, 0x0a, 0x10, 0x0e, 0x0d, 0x0e, 0x12,
- 0x11, 0x10, 0x13, 0x18, 0x28, 0x1a, 0x18, 0x16, 0x16, 0x18, 0x31, 0x23,
- 0x25, 0x1d, 0x28, 0x3a, 0x33, 0x3d, 0x3c, 0x39, 0x33, 0x38, 0x37, 0x40,
- 0x48, 0x5c, 0x4e, 0x40, 0x44, 0x57, 0x45, 0x37, 0x38, 0x50, 0x6d, 0x51,
- 0x57, 0x5f, 0x62, 0x67, 0x68, 0x67, 0x3e, 0x4d, 0x71, 0x79, 0x70, 0x64,
- 0x78, 0x5c, 0x65, 0x67, 0x63, 0xff, 0xdb, 0x00, 0x43, 0x01, 0x11, 0x12,
- 0x12, 0x18, 0x15, 0x18, 0x2f, 0x1a, 0x1a, 0x2f, 0x63, 0x42, 0x38, 0x42,
- 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63,
- 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63,
- 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63,
- 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63,
- 0x63, 0x63, 0xff, 0xc2, 0x00, 0x11, 0x08, 0x00, 0x10, 0x00, 0x20, 0x03,
- 0x01, 0x22, 0x00, 0x02, 0x11, 0x01, 0x03, 0x11, 0x01, 0xff, 0xc4, 0x00,
- 0x18, 0x00, 0x00, 0x02, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x05, 0x01, 0x02, 0x04, 0xff,
- 0xc4, 0x00, 0x16, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x01, 0x02, 0xff,
- 0xda, 0x00, 0x0c, 0x03, 0x01, 0x00, 0x02, 0x10, 0x03, 0x10, 0x00, 0x00,
- 0x01, 0x20, 0xe7, 0x28, 0xa3, 0x0b, 0x2e, 0x2d, 0xcf, 0xff, 0xc4, 0x00,
- 0x1b, 0x10, 0x00, 0x03, 0x00, 0x02, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x02, 0x11, 0x00, 0x03, 0x10,
- 0x12, 0x13, 0xff, 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x01, 0x05, 0x02,
- 0x3b, 0x80, 0x6f, 0x56, 0x76, 0x56, 0x23, 0x87, 0x99, 0x0d, 0x26, 0x62,
- 0xf6, 0xbf, 0xff, 0xc4, 0x00, 0x17, 0x11, 0x01, 0x00, 0x03, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x01, 0x11, 0x21, 0xff, 0xda, 0x00, 0x08, 0x01, 0x03, 0x01, 0x01, 0x3f,
- 0x01, 0xc8, 0x53, 0xff, 0xc4, 0x00, 0x16, 0x11, 0x01, 0x01, 0x01, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x11, 0x32, 0xff, 0xda, 0x00, 0x08, 0x01, 0x02, 0x01, 0x01, 0x3f,
- 0x01, 0xd2, 0xc7, 0xff, 0xc4, 0x00, 0x1e, 0x10, 0x00, 0x02, 0x01, 0x03,
- 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x11, 0x21, 0x02, 0x12, 0x32, 0x10, 0x31, 0x71, 0x81, 0xa1, 0xff,
- 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x06, 0x3f, 0x02, 0x4b, 0xb3, 0x28,
- 0x32, 0xd2, 0xed, 0xf9, 0x1d, 0x3e, 0x13, 0x51, 0x73, 0x83, 0xff, 0xc4,
- 0x00, 0x1c, 0x10, 0x01, 0x01, 0x01, 0x00, 0x02, 0x03, 0x01, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x11, 0x00, 0x21, 0x51,
- 0x31, 0x61, 0x81, 0xf0, 0xff, 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x01,
- 0x3f, 0x21, 0x75, 0x6e, 0x31, 0x94, 0x28, 0xf9, 0x30, 0xdc, 0x27, 0xdb,
- 0xa9, 0x01, 0xf3, 0xde, 0x02, 0xa0, 0xed, 0x1e, 0x34, 0x68, 0x23, 0xf9,
- 0xc6, 0x48, 0x5d, 0x7a, 0x35, 0x02, 0xf5, 0x6f, 0xff, 0xda, 0x00, 0x0c,
- 0x03, 0x01, 0x00, 0x02, 0x00, 0x03, 0x00, 0x00, 0x00, 0x10, 0x13, 0x5f,
- 0xff, 0xc4, 0x00, 0x17, 0x11, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x11,
- 0x21, 0xff, 0xda, 0x00, 0x08, 0x01, 0x03, 0x01, 0x01, 0x3f, 0x10, 0x0e,
- 0xa1, 0x3a, 0x76, 0xff, 0xc4, 0x00, 0x17, 0x11, 0x01, 0x01, 0x01, 0x01,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x01, 0x00, 0x21, 0x11, 0xff, 0xda, 0x00, 0x08, 0x01, 0x02, 0x01, 0x01,
- 0x3f, 0x10, 0x57, 0x0b, 0x08, 0x70, 0xdb, 0xff, 0xc4, 0x00, 0x1f, 0x10,
- 0x01, 0x00, 0x02, 0x01, 0x04, 0x03, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x11, 0x31, 0x41, 0x61, 0x71, 0x91,
- 0x21, 0x81, 0xd1, 0xb1, 0xff, 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x01,
- 0x3f, 0x10, 0x1b, 0x30, 0xe9, 0x58, 0xbe, 0x1a, 0xfd, 0x8a, 0xeb, 0x8b,
- 0x34, 0x74, 0x80, 0x4b, 0xb5, 0xd5, 0xab, 0xcd, 0x46, 0x96, 0x2e, 0xec,
- 0xbd, 0xaa, 0x78, 0x47, 0x5c, 0x47, 0xa7, 0x30, 0x49, 0xad, 0x88, 0x7c,
- 0x40, 0x74, 0x30, 0xff, 0x00, 0x23, 0x1d, 0x03, 0x0b, 0xb7, 0xd4, 0xff,
- 0xd9};
-static const size_t kTest2JpgLen = 685;
-
-// test 3 is J422
-static const uint8_t kTest3Jpg[] = {
- 0xff, 0xd8, 0xff, 0xe0, 0x00, 0x10, 0x4a, 0x46, 0x49, 0x46, 0x00, 0x01,
- 0x01, 0x01, 0x00, 0x48, 0x00, 0x48, 0x00, 0x00, 0xff, 0xdb, 0x00, 0x43,
- 0x00, 0x10, 0x0b, 0x0c, 0x0e, 0x0c, 0x0a, 0x10, 0x0e, 0x0d, 0x0e, 0x12,
- 0x11, 0x10, 0x13, 0x18, 0x28, 0x1a, 0x18, 0x16, 0x16, 0x18, 0x31, 0x23,
- 0x25, 0x1d, 0x28, 0x3a, 0x33, 0x3d, 0x3c, 0x39, 0x33, 0x38, 0x37, 0x40,
- 0x48, 0x5c, 0x4e, 0x40, 0x44, 0x57, 0x45, 0x37, 0x38, 0x50, 0x6d, 0x51,
- 0x57, 0x5f, 0x62, 0x67, 0x68, 0x67, 0x3e, 0x4d, 0x71, 0x79, 0x70, 0x64,
- 0x78, 0x5c, 0x65, 0x67, 0x63, 0xff, 0xdb, 0x00, 0x43, 0x01, 0x11, 0x12,
- 0x12, 0x18, 0x15, 0x18, 0x2f, 0x1a, 0x1a, 0x2f, 0x63, 0x42, 0x38, 0x42,
- 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63,
- 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63,
- 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63,
- 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63,
- 0x63, 0x63, 0xff, 0xc2, 0x00, 0x11, 0x08, 0x00, 0x10, 0x00, 0x20, 0x03,
- 0x01, 0x21, 0x00, 0x02, 0x11, 0x01, 0x03, 0x11, 0x01, 0xff, 0xc4, 0x00,
- 0x17, 0x00, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x04, 0x01, 0x02, 0xff, 0xc4,
- 0x00, 0x17, 0x01, 0x00, 0x03, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x02, 0x03, 0x00, 0xff,
- 0xda, 0x00, 0x0c, 0x03, 0x01, 0x00, 0x02, 0x10, 0x03, 0x10, 0x00, 0x00,
- 0x01, 0x43, 0x8d, 0x1f, 0xa2, 0xb3, 0xca, 0x1b, 0x57, 0x0f, 0xff, 0xc4,
- 0x00, 0x1b, 0x10, 0x00, 0x03, 0x00, 0x02, 0x03, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x02, 0x11, 0x00, 0x03,
- 0x10, 0x12, 0x13, 0xff, 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x01, 0x05,
- 0x02, 0x3b, 0x80, 0x6f, 0x56, 0x76, 0x56, 0x23, 0x87, 0x99, 0x0d, 0x26,
- 0x62, 0xf6, 0xbf, 0xff, 0xc4, 0x00, 0x19, 0x11, 0x00, 0x02, 0x03, 0x01,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x01, 0x02, 0x10, 0x11, 0x21, 0xff, 0xda, 0x00, 0x08, 0x01, 0x03,
- 0x01, 0x01, 0x3f, 0x01, 0x51, 0xce, 0x8c, 0x75, 0xff, 0xc4, 0x00, 0x18,
- 0x11, 0x00, 0x03, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x02, 0x61, 0x21, 0xff, 0xda,
- 0x00, 0x08, 0x01, 0x02, 0x01, 0x01, 0x3f, 0x01, 0xa6, 0xd9, 0x2f, 0x84,
- 0xe8, 0xf0, 0xff, 0xc4, 0x00, 0x1e, 0x10, 0x00, 0x02, 0x01, 0x03, 0x05,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x11, 0x21, 0x02, 0x12, 0x32, 0x10, 0x31, 0x71, 0x81, 0xa1, 0xff, 0xda,
- 0x00, 0x08, 0x01, 0x01, 0x00, 0x06, 0x3f, 0x02, 0x4b, 0xb3, 0x28, 0x32,
- 0xd2, 0xed, 0xf9, 0x1d, 0x3e, 0x13, 0x51, 0x73, 0x83, 0xff, 0xc4, 0x00,
- 0x1c, 0x10, 0x01, 0x01, 0x01, 0x00, 0x02, 0x03, 0x01, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x11, 0x00, 0x21, 0x51, 0x31,
- 0x61, 0x81, 0xf0, 0xff, 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x01, 0x3f,
- 0x21, 0x75, 0x6e, 0x31, 0x94, 0x28, 0xf9, 0x30, 0xdc, 0x27, 0xdb, 0xa9,
- 0x01, 0xf3, 0xde, 0x02, 0xa0, 0xed, 0x1e, 0x34, 0x68, 0x23, 0xf9, 0xc6,
- 0x48, 0x5d, 0x7a, 0x35, 0x02, 0xf5, 0x6f, 0xff, 0xda, 0x00, 0x0c, 0x03,
- 0x01, 0x00, 0x02, 0x00, 0x03, 0x00, 0x00, 0x00, 0x10, 0x2e, 0x45, 0xff,
- 0xc4, 0x00, 0x18, 0x11, 0x00, 0x03, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x11, 0x21,
- 0x31, 0xff, 0xda, 0x00, 0x08, 0x01, 0x03, 0x01, 0x01, 0x3f, 0x10, 0x53,
- 0x50, 0xba, 0x54, 0xc1, 0x67, 0x4f, 0xff, 0xc4, 0x00, 0x18, 0x11, 0x00,
- 0x03, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x01, 0x11, 0x21, 0x00, 0x10, 0xff, 0xda, 0x00, 0x08,
- 0x01, 0x02, 0x01, 0x01, 0x3f, 0x10, 0x18, 0x81, 0x5c, 0x04, 0x1a, 0xca,
- 0x91, 0xbf, 0xff, 0xc4, 0x00, 0x1f, 0x10, 0x01, 0x00, 0x02, 0x01, 0x04,
- 0x03, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01,
- 0x00, 0x11, 0x31, 0x41, 0x61, 0x71, 0x91, 0x21, 0x81, 0xd1, 0xb1, 0xff,
- 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x01, 0x3f, 0x10, 0x1b, 0x30, 0xe9,
- 0x58, 0xbe, 0x1a, 0xfd, 0x8a, 0xeb, 0x8b, 0x34, 0x74, 0x80, 0x4b, 0xb5,
- 0xd5, 0xab, 0xcd, 0x46, 0x96, 0x2e, 0xec, 0xbd, 0xaa, 0x78, 0x47, 0x5c,
- 0x47, 0xa7, 0x30, 0x49, 0xad, 0x88, 0x7c, 0x40, 0x74, 0x30, 0xff, 0x00,
- 0x23, 0x1d, 0x03, 0x0b, 0xb7, 0xd4, 0xff, 0xd9};
-static const size_t kTest3JpgLen = 704;
-
-// test 4 is J422 vertical - not supported
-static const uint8_t kTest4Jpg[] = {
- 0xff, 0xd8, 0xff, 0xe0, 0x00, 0x10, 0x4a, 0x46, 0x49, 0x46, 0x00, 0x01,
- 0x01, 0x01, 0x00, 0x48, 0x00, 0x48, 0x00, 0x00, 0xff, 0xdb, 0x00, 0x43,
- 0x00, 0x10, 0x0b, 0x0c, 0x0e, 0x0c, 0x0a, 0x10, 0x0e, 0x0d, 0x0e, 0x12,
- 0x11, 0x10, 0x13, 0x18, 0x28, 0x1a, 0x18, 0x16, 0x16, 0x18, 0x31, 0x23,
- 0x25, 0x1d, 0x28, 0x3a, 0x33, 0x3d, 0x3c, 0x39, 0x33, 0x38, 0x37, 0x40,
- 0x48, 0x5c, 0x4e, 0x40, 0x44, 0x57, 0x45, 0x37, 0x38, 0x50, 0x6d, 0x51,
- 0x57, 0x5f, 0x62, 0x67, 0x68, 0x67, 0x3e, 0x4d, 0x71, 0x79, 0x70, 0x64,
- 0x78, 0x5c, 0x65, 0x67, 0x63, 0xff, 0xdb, 0x00, 0x43, 0x01, 0x11, 0x12,
- 0x12, 0x18, 0x15, 0x18, 0x2f, 0x1a, 0x1a, 0x2f, 0x63, 0x42, 0x38, 0x42,
- 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63,
- 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63,
- 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63,
- 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63,
- 0x63, 0x63, 0xff, 0xc2, 0x00, 0x11, 0x08, 0x00, 0x10, 0x00, 0x20, 0x03,
- 0x01, 0x12, 0x00, 0x02, 0x11, 0x01, 0x03, 0x11, 0x01, 0xff, 0xc4, 0x00,
- 0x18, 0x00, 0x00, 0x02, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x05, 0x01, 0x02, 0x03, 0xff,
- 0xc4, 0x00, 0x16, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x03, 0xff,
- 0xda, 0x00, 0x0c, 0x03, 0x01, 0x00, 0x02, 0x10, 0x03, 0x10, 0x00, 0x00,
- 0x01, 0xd2, 0x98, 0xe9, 0x03, 0x0c, 0x00, 0x46, 0x21, 0xd9, 0xff, 0xc4,
- 0x00, 0x1b, 0x10, 0x00, 0x03, 0x00, 0x02, 0x03, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x02, 0x11, 0x00, 0x03,
- 0x10, 0x12, 0x13, 0xff, 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x01, 0x05,
- 0x02, 0x3b, 0x80, 0x6f, 0x56, 0x76, 0x56, 0x23, 0x87, 0x99, 0x0d, 0x26,
- 0x62, 0xf6, 0xbf, 0xff, 0xc4, 0x00, 0x17, 0x11, 0x01, 0x01, 0x01, 0x01,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x11, 0x01, 0x21, 0xff, 0xda, 0x00, 0x08, 0x01, 0x03, 0x01, 0x01,
- 0x3f, 0x01, 0x98, 0xb1, 0xbd, 0x47, 0xff, 0xc4, 0x00, 0x18, 0x11, 0x00,
- 0x03, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x01, 0x12, 0x11, 0x21, 0xff, 0xda, 0x00, 0x08,
- 0x01, 0x02, 0x01, 0x01, 0x3f, 0x01, 0xb6, 0x35, 0xa2, 0xe1, 0x47, 0xff,
- 0xc4, 0x00, 0x1e, 0x10, 0x00, 0x02, 0x01, 0x03, 0x05, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x11, 0x21, 0x02,
- 0x12, 0x32, 0x10, 0x31, 0x71, 0x81, 0xa1, 0xff, 0xda, 0x00, 0x08, 0x01,
- 0x01, 0x00, 0x06, 0x3f, 0x02, 0x4b, 0xb3, 0x28, 0x32, 0xd2, 0xed, 0xf9,
- 0x1d, 0x3e, 0x13, 0x51, 0x73, 0x83, 0xff, 0xc4, 0x00, 0x1c, 0x10, 0x01,
- 0x01, 0x01, 0x00, 0x02, 0x03, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x01, 0x11, 0x00, 0x21, 0x51, 0x31, 0x61, 0x81, 0xf0,
- 0xff, 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x01, 0x3f, 0x21, 0x75, 0x6e,
- 0x31, 0x94, 0x28, 0xf9, 0x30, 0xdc, 0x27, 0xdb, 0xa9, 0x01, 0xf3, 0xde,
- 0x02, 0xa0, 0xed, 0x1e, 0x34, 0x68, 0x23, 0xf9, 0xc6, 0x48, 0x5d, 0x7a,
- 0x35, 0x02, 0xf5, 0x6f, 0xff, 0xda, 0x00, 0x0c, 0x03, 0x01, 0x00, 0x02,
- 0x00, 0x03, 0x00, 0x00, 0x00, 0x10, 0x24, 0xaf, 0xff, 0xc4, 0x00, 0x19,
- 0x11, 0x00, 0x03, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x11, 0x51, 0x21, 0x31, 0xff,
- 0xda, 0x00, 0x08, 0x01, 0x03, 0x01, 0x01, 0x3f, 0x10, 0x59, 0x11, 0xca,
- 0x42, 0x60, 0x9f, 0x69, 0xff, 0xc4, 0x00, 0x19, 0x11, 0x00, 0x02, 0x03,
- 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x01, 0x11, 0x21, 0x31, 0x61, 0xff, 0xda, 0x00, 0x08, 0x01,
- 0x02, 0x01, 0x01, 0x3f, 0x10, 0xb0, 0xd7, 0x27, 0x51, 0xb6, 0x41, 0xff,
- 0xc4, 0x00, 0x1f, 0x10, 0x01, 0x00, 0x02, 0x01, 0x04, 0x03, 0x01, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x11, 0x31,
- 0x41, 0x61, 0x71, 0x91, 0x21, 0x81, 0xd1, 0xb1, 0xff, 0xda, 0x00, 0x08,
- 0x01, 0x01, 0x00, 0x01, 0x3f, 0x10, 0x1b, 0x30, 0xe9, 0x58, 0xbe, 0x1a,
- 0xfd, 0x8a, 0xeb, 0x8b, 0x34, 0x74, 0x80, 0x4b, 0xb5, 0xd5, 0xab, 0xcd,
- 0x46, 0x96, 0x2e, 0xec, 0xbd, 0xaa, 0x78, 0x47, 0x5c, 0x47, 0xa7, 0x30,
- 0x49, 0xad, 0x88, 0x7c, 0x40, 0x74, 0x30, 0xff, 0x00, 0x23, 0x1d, 0x03,
- 0x0b, 0xb7, 0xd4, 0xff, 0xd9};
-static const size_t kTest4JpgLen = 701;
-
-TEST_F(LibYUVConvertTest, TestMJPGSize) {
- int width = 0;
- int height = 0;
- int ret = MJPGSize(kTest2Jpg, kTest2JpgLen, &width, &height);
- EXPECT_EQ(0, ret);
-
- printf("test jpeg size %d x %d\n", width, height);
-}
-
-TEST_F(LibYUVConvertTest, TestMJPGToI420) {
- int width = 0;
- int height = 0;
- int ret = MJPGSize(kTest2Jpg, kTest2JpgLen, &width, &height);
- EXPECT_EQ(0, ret);
-
- int half_width = (width + 1) / 2;
- int half_height = (height + 1) / 2;
- int benchmark_iterations = benchmark_iterations_ * benchmark_width_ *
- benchmark_height_ / (width * height);
-
- align_buffer_page_end(dst_y, width * height);
- align_buffer_page_end(dst_u, half_width * half_height);
- align_buffer_page_end(dst_v, half_width * half_height);
- for (int times = 0; times < benchmark_iterations; ++times) {
- ret = MJPGToI420(kTest2Jpg, kTest2JpgLen, dst_y, width, dst_u, half_width,
- dst_v, half_width, width, height, width, height);
- }
- // Expect sucesss
- EXPECT_EQ(0, ret);
-
- // Test result matches known hash value.
- uint32_t dst_y_hash = HashDjb2(dst_y, width * height, 5381);
- uint32_t dst_u_hash = HashDjb2(dst_u, half_width * half_height, 5381);
- uint32_t dst_v_hash = HashDjb2(dst_v, half_width * half_height, 5381);
- EXPECT_EQ(dst_y_hash, 2682851208u);
- EXPECT_EQ(dst_u_hash, 2501859930u);
- EXPECT_EQ(dst_v_hash, 2126459123u);
-
- free_aligned_buffer_page_end(dst_y);
- free_aligned_buffer_page_end(dst_u);
- free_aligned_buffer_page_end(dst_v);
-}
-
-TEST_F(LibYUVConvertTest, TestMJPGToI420_NV21) {
- int width = 0;
- int height = 0;
- int ret = MJPGSize(kTest2Jpg, kTest2JpgLen, &width, &height);
- EXPECT_EQ(0, ret);
-
- int half_width = (width + 1) / 2;
- int half_height = (height + 1) / 2;
- int benchmark_iterations = benchmark_iterations_ * benchmark_width_ *
- benchmark_height_ / (width * height);
-
- // Convert to NV21
- align_buffer_page_end(dst_y, width * height);
- align_buffer_page_end(dst_vu, half_width * half_height * 2);
-
- for (int times = 0; times < benchmark_iterations; ++times) {
- ret = MJPGToNV21(kTest2Jpg, kTest2JpgLen, dst_y, width, dst_vu,
- half_width * 2, width, height, width, height);
- }
- // Expect sucesss
- EXPECT_EQ(0, ret);
-
- // Convert to I420
- align_buffer_page_end(dst2_y, width * height);
- align_buffer_page_end(dst2_u, half_width * half_height);
- align_buffer_page_end(dst2_v, half_width * half_height);
- for (int times = 0; times < benchmark_iterations; ++times) {
- ret = MJPGToI420(kTest2Jpg, kTest2JpgLen, dst2_y, width, dst2_u, half_width,
- dst2_v, half_width, width, height, width, height);
- }
- // Expect sucesss
- EXPECT_EQ(0, ret);
-
- // Convert I420 to NV21
- align_buffer_page_end(dst3_y, width * height);
- align_buffer_page_end(dst3_vu, half_width * half_height * 2);
-
- I420ToNV21(dst2_y, width, dst2_u, half_width, dst2_v, half_width, dst3_y,
- width, dst3_vu, half_width * 2, width, height);
-
- for (int i = 0; i < width * height; ++i) {
- EXPECT_EQ(dst_y[i], dst3_y[i]);
- }
- for (int i = 0; i < half_width * half_height * 2; ++i) {
- EXPECT_EQ(dst_vu[i], dst3_vu[i]);
- EXPECT_EQ(dst_vu[i], dst3_vu[i]);
- }
-
- free_aligned_buffer_page_end(dst3_y);
- free_aligned_buffer_page_end(dst3_vu);
-
- free_aligned_buffer_page_end(dst2_y);
- free_aligned_buffer_page_end(dst2_u);
- free_aligned_buffer_page_end(dst2_v);
-
- free_aligned_buffer_page_end(dst_y);
- free_aligned_buffer_page_end(dst_vu);
-}
-
-TEST_F(LibYUVConvertTest, TestMJPGToI420_NV12) {
- int width = 0;
- int height = 0;
- int ret = MJPGSize(kTest2Jpg, kTest2JpgLen, &width, &height);
- EXPECT_EQ(0, ret);
-
- int half_width = (width + 1) / 2;
- int half_height = (height + 1) / 2;
- int benchmark_iterations = benchmark_iterations_ * benchmark_width_ *
- benchmark_height_ / (width * height);
-
- // Convert to NV12
- align_buffer_page_end(dst_y, width * height);
- align_buffer_page_end(dst_uv, half_width * half_height * 2);
-
- for (int times = 0; times < benchmark_iterations; ++times) {
- ret = MJPGToNV12(kTest2Jpg, kTest2JpgLen, dst_y, width, dst_uv,
- half_width * 2, width, height, width, height);
- }
- // Expect sucesss
- EXPECT_EQ(0, ret);
-
- // Convert to I420
- align_buffer_page_end(dst2_y, width * height);
- align_buffer_page_end(dst2_u, half_width * half_height);
- align_buffer_page_end(dst2_v, half_width * half_height);
- for (int times = 0; times < benchmark_iterations; ++times) {
- ret = MJPGToI420(kTest2Jpg, kTest2JpgLen, dst2_y, width, dst2_u, half_width,
- dst2_v, half_width, width, height, width, height);
- }
- // Expect sucesss
- EXPECT_EQ(0, ret);
-
- // Convert I420 to NV12
- align_buffer_page_end(dst3_y, width * height);
- align_buffer_page_end(dst3_uv, half_width * half_height * 2);
-
- I420ToNV12(dst2_y, width, dst2_u, half_width, dst2_v, half_width, dst3_y,
- width, dst3_uv, half_width * 2, width, height);
-
- for (int i = 0; i < width * height; ++i) {
- EXPECT_EQ(dst_y[i], dst3_y[i]);
- }
- for (int i = 0; i < half_width * half_height * 2; ++i) {
- EXPECT_EQ(dst_uv[i], dst3_uv[i]);
- EXPECT_EQ(dst_uv[i], dst3_uv[i]);
- }
-
- free_aligned_buffer_page_end(dst3_y);
- free_aligned_buffer_page_end(dst3_uv);
-
- free_aligned_buffer_page_end(dst2_y);
- free_aligned_buffer_page_end(dst2_u);
- free_aligned_buffer_page_end(dst2_v);
-
- free_aligned_buffer_page_end(dst_y);
- free_aligned_buffer_page_end(dst_uv);
-}
-
-TEST_F(LibYUVConvertTest, TestMJPGToNV21_420) {
- int width = 0;
- int height = 0;
- int ret = MJPGSize(kTest2Jpg, kTest2JpgLen, &width, &height);
- EXPECT_EQ(0, ret);
-
- int half_width = (width + 1) / 2;
- int half_height = (height + 1) / 2;
- int benchmark_iterations = benchmark_iterations_ * benchmark_width_ *
- benchmark_height_ / (width * height);
-
- align_buffer_page_end(dst_y, width * height);
- align_buffer_page_end(dst_uv, half_width * half_height * 2);
- for (int times = 0; times < benchmark_iterations; ++times) {
- ret = MJPGToNV21(kTest2Jpg, kTest2JpgLen, dst_y, width, dst_uv,
- half_width * 2, width, height, width, height);
- }
- // Expect sucesss
- EXPECT_EQ(0, ret);
-
- // Test result matches known hash value.
- uint32_t dst_y_hash = HashDjb2(dst_y, width * height, 5381);
- uint32_t dst_uv_hash = HashDjb2(dst_uv, half_width * half_height * 2, 5381);
- EXPECT_EQ(dst_y_hash, 2682851208u);
- EXPECT_EQ(dst_uv_hash, 1069662856u);
-
- free_aligned_buffer_page_end(dst_y);
- free_aligned_buffer_page_end(dst_uv);
-}
-
-TEST_F(LibYUVConvertTest, TestMJPGToNV12_420) {
- int width = 0;
- int height = 0;
- int ret = MJPGSize(kTest2Jpg, kTest2JpgLen, &width, &height);
- EXPECT_EQ(0, ret);
-
- int half_width = (width + 1) / 2;
- int half_height = (height + 1) / 2;
- int benchmark_iterations = benchmark_iterations_ * benchmark_width_ *
- benchmark_height_ / (width * height);
-
- align_buffer_page_end(dst_y, width * height);
- align_buffer_page_end(dst_uv, half_width * half_height * 2);
- for (int times = 0; times < benchmark_iterations; ++times) {
- ret = MJPGToNV12(kTest2Jpg, kTest2JpgLen, dst_y, width, dst_uv,
- half_width * 2, width, height, width, height);
- }
- // Expect sucesss
- EXPECT_EQ(0, ret);
-
- // Test result matches known hash value. Hashes are for VU so flip the plane.
- uint32_t dst_y_hash = HashDjb2(dst_y, width * height, 5381);
- align_buffer_page_end(dst_vu, half_width * half_height * 2);
- SwapUVPlane(dst_uv, half_width * 2, dst_vu, half_width * 2, half_width,
- half_height);
- uint32_t dst_vu_hash = HashDjb2(dst_vu, half_width * half_height * 2, 5381);
- EXPECT_EQ(dst_y_hash, 2682851208u);
- EXPECT_EQ(dst_vu_hash, 1069662856u);
-
- free_aligned_buffer_page_end(dst_y);
- free_aligned_buffer_page_end(dst_uv);
- free_aligned_buffer_page_end(dst_vu);
-}
-
-// TODO(fbarchard): Improve test to compare against I422, not checksum
-TEST_F(LibYUVConvertTest, DISABLED_TestMJPGToNV21_422) {
- int width = 0;
- int height = 0;
- int ret = MJPGSize(kTest3Jpg, kTest3JpgLen, &width, &height);
- EXPECT_EQ(0, ret);
-
- int half_width = (width + 1) / 2;
- int half_height = (height + 1) / 2;
- int benchmark_iterations = benchmark_iterations_ * benchmark_width_ *
- benchmark_height_ / (width * height);
-
- align_buffer_page_end(dst_y, width * height);
- align_buffer_page_end(dst_uv, half_width * half_height * 2);
- for (int times = 0; times < benchmark_iterations; ++times) {
- ret = MJPGToNV21(kTest3Jpg, kTest3JpgLen, dst_y, width, dst_uv,
- half_width * 2, width, height, width, height);
- }
- // Expect sucesss
- EXPECT_EQ(0, ret);
-
- // Test result matches known hash value.
- uint32_t dst_y_hash = HashDjb2(dst_y, width * height, 5381);
- uint32_t dst_uv_hash = HashDjb2(dst_uv, half_width * half_height * 2, 5381);
- EXPECT_EQ(dst_y_hash, 2682851208u);
- EXPECT_EQ(dst_uv_hash, 493520167u);
-
- free_aligned_buffer_page_end(dst_y);
- free_aligned_buffer_page_end(dst_uv);
-}
-
-TEST_F(LibYUVConvertTest, DISABLED_TestMJPGToNV12_422) {
- int width = 0;
- int height = 0;
- int ret = MJPGSize(kTest3Jpg, kTest3JpgLen, &width, &height);
- EXPECT_EQ(0, ret);
-
- int half_width = (width + 1) / 2;
- int half_height = (height + 1) / 2;
- int benchmark_iterations = benchmark_iterations_ * benchmark_width_ *
- benchmark_height_ / (width * height);
-
- align_buffer_page_end(dst_y, width * height);
- align_buffer_page_end(dst_uv, half_width * half_height * 2);
- for (int times = 0; times < benchmark_iterations; ++times) {
- ret = MJPGToNV12(kTest3Jpg, kTest3JpgLen, dst_y, width, dst_uv,
- half_width * 2, width, height, width, height);
- }
- // Expect sucesss
- EXPECT_EQ(0, ret);
-
- // Test result matches known hash value. Hashes are for VU so flip the plane.
- uint32_t dst_y_hash = HashDjb2(dst_y, width * height, 5381);
- align_buffer_page_end(dst_vu, half_width * half_height * 2);
- SwapUVPlane(dst_uv, half_width * 2, dst_vu, half_width * 2, half_width,
- half_height);
- uint32_t dst_vu_hash = HashDjb2(dst_vu, half_width * half_height * 2, 5381);
- EXPECT_EQ(dst_y_hash, 2682851208u);
- EXPECT_EQ(dst_vu_hash, 493520167u);
-
- free_aligned_buffer_page_end(dst_y);
- free_aligned_buffer_page_end(dst_uv);
- free_aligned_buffer_page_end(dst_vu);
-}
-
-TEST_F(LibYUVConvertTest, TestMJPGToNV21_400) {
- int width = 0;
- int height = 0;
- int ret = MJPGSize(kTest0Jpg, kTest0JpgLen, &width, &height);
- EXPECT_EQ(0, ret);
-
- int half_width = (width + 1) / 2;
- int half_height = (height + 1) / 2;
- int benchmark_iterations = benchmark_iterations_ * benchmark_width_ *
- benchmark_height_ / (width * height);
-
- align_buffer_page_end(dst_y, width * height);
- align_buffer_page_end(dst_uv, half_width * half_height * 2);
- for (int times = 0; times < benchmark_iterations; ++times) {
- ret = MJPGToNV21(kTest0Jpg, kTest0JpgLen, dst_y, width, dst_uv,
- half_width * 2, width, height, width, height);
- }
- // Expect sucesss
- EXPECT_EQ(0, ret);
-
- // Test result matches known hash value.
- uint32_t dst_y_hash = HashDjb2(dst_y, width * height, 5381);
- uint32_t dst_uv_hash = HashDjb2(dst_uv, half_width * half_height * 2, 5381);
- EXPECT_EQ(dst_y_hash, 330644005u);
- EXPECT_EQ(dst_uv_hash, 135214341u);
-
- free_aligned_buffer_page_end(dst_y);
- free_aligned_buffer_page_end(dst_uv);
-}
-
-TEST_F(LibYUVConvertTest, TestMJPGToNV12_400) {
- int width = 0;
- int height = 0;
- int ret = MJPGSize(kTest0Jpg, kTest0JpgLen, &width, &height);
- EXPECT_EQ(0, ret);
-
- int half_width = (width + 1) / 2;
- int half_height = (height + 1) / 2;
- int benchmark_iterations = benchmark_iterations_ * benchmark_width_ *
- benchmark_height_ / (width * height);
-
- align_buffer_page_end(dst_y, width * height);
- align_buffer_page_end(dst_uv, half_width * half_height * 2);
- for (int times = 0; times < benchmark_iterations; ++times) {
- ret = MJPGToNV12(kTest0Jpg, kTest0JpgLen, dst_y, width, dst_uv,
- half_width * 2, width, height, width, height);
- }
- // Expect sucesss
- EXPECT_EQ(0, ret);
-
- // Test result matches known hash value. Hashes are for VU so flip the plane.
- uint32_t dst_y_hash = HashDjb2(dst_y, width * height, 5381);
- align_buffer_page_end(dst_vu, half_width * half_height * 2);
- SwapUVPlane(dst_uv, half_width * 2, dst_vu, half_width * 2, half_width,
- half_height);
- uint32_t dst_vu_hash = HashDjb2(dst_vu, half_width * half_height * 2, 5381);
- EXPECT_EQ(dst_y_hash, 330644005u);
- EXPECT_EQ(dst_vu_hash, 135214341u);
-
- free_aligned_buffer_page_end(dst_y);
- free_aligned_buffer_page_end(dst_uv);
- free_aligned_buffer_page_end(dst_vu);
-}
-
-TEST_F(LibYUVConvertTest, TestMJPGToNV21_444) {
- int width = 0;
- int height = 0;
- int ret = MJPGSize(kTest1Jpg, kTest1JpgLen, &width, &height);
- EXPECT_EQ(0, ret);
-
- int half_width = (width + 1) / 2;
- int half_height = (height + 1) / 2;
- int benchmark_iterations = benchmark_iterations_ * benchmark_width_ *
- benchmark_height_ / (width * height);
-
- align_buffer_page_end(dst_y, width * height);
- align_buffer_page_end(dst_uv, half_width * half_height * 2);
- for (int times = 0; times < benchmark_iterations; ++times) {
- ret = MJPGToNV21(kTest1Jpg, kTest1JpgLen, dst_y, width, dst_uv,
- half_width * 2, width, height, width, height);
- }
- // Expect sucesss
- EXPECT_EQ(0, ret);
-
- // Test result matches known hash value.
- uint32_t dst_y_hash = HashDjb2(dst_y, width * height, 5381);
- uint32_t dst_uv_hash = HashDjb2(dst_uv, half_width * half_height * 2, 5381);
- EXPECT_EQ(dst_y_hash, 2682851208u);
- EXPECT_EQ(dst_uv_hash, 506143297u);
-
- free_aligned_buffer_page_end(dst_y);
- free_aligned_buffer_page_end(dst_uv);
-}
-
-TEST_F(LibYUVConvertTest, TestMJPGToNV12_444) {
- int width = 0;
- int height = 0;
- int ret = MJPGSize(kTest1Jpg, kTest1JpgLen, &width, &height);
- EXPECT_EQ(0, ret);
-
- int half_width = (width + 1) / 2;
- int half_height = (height + 1) / 2;
- int benchmark_iterations = benchmark_iterations_ * benchmark_width_ *
- benchmark_height_ / (width * height);
-
- align_buffer_page_end(dst_y, width * height);
- align_buffer_page_end(dst_uv, half_width * half_height * 2);
- for (int times = 0; times < benchmark_iterations; ++times) {
- ret = MJPGToNV12(kTest1Jpg, kTest1JpgLen, dst_y, width, dst_uv,
- half_width * 2, width, height, width, height);
- }
- // Expect sucesss
- EXPECT_EQ(0, ret);
-
- // Test result matches known hash value. Hashes are for VU so flip the plane.
- uint32_t dst_y_hash = HashDjb2(dst_y, width * height, 5381);
- align_buffer_page_end(dst_vu, half_width * half_height * 2);
- SwapUVPlane(dst_uv, half_width * 2, dst_vu, half_width * 2, half_width,
- half_height);
- uint32_t dst_vu_hash = HashDjb2(dst_vu, half_width * half_height * 2, 5381);
- EXPECT_EQ(dst_y_hash, 2682851208u);
- EXPECT_EQ(dst_vu_hash, 506143297u);
-
- free_aligned_buffer_page_end(dst_y);
- free_aligned_buffer_page_end(dst_uv);
- free_aligned_buffer_page_end(dst_vu);
-}
-
-TEST_F(LibYUVConvertTest, TestMJPGToARGB) {
- int width = 0;
- int height = 0;
- int ret = MJPGSize(kTest3Jpg, kTest3JpgLen, &width, &height);
- EXPECT_EQ(0, ret);
-
- int benchmark_iterations = benchmark_iterations_ * benchmark_width_ *
- benchmark_height_ / (width * height);
-
- align_buffer_page_end(dst_argb, width * height * 4);
- for (int times = 0; times < benchmark_iterations; ++times) {
- ret = MJPGToARGB(kTest3Jpg, kTest3JpgLen, dst_argb, width * 4, width,
- height, width, height);
+#define TESTQPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
+ YALIGN, W1280, N, NEG, OFF, ATTEN) \
+ TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##N) { \
+ const int kWidth = W1280; \
+ const int kHeight = ALIGNINT(benchmark_height_, YALIGN); \
+ const int kStrideB = ALIGNINT(kWidth * BPP_B, ALIGN); \
+ const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X); \
+ const int kSizeUV = kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y); \
+ align_buffer_page_end(src_y, kWidth* kHeight + OFF); \
+ align_buffer_page_end(src_u, kSizeUV + OFF); \
+ align_buffer_page_end(src_v, kSizeUV + OFF); \
+ align_buffer_page_end(src_a, kWidth* kHeight + OFF); \
+ align_buffer_page_end(dst_argb_c, kStrideB* kHeight + OFF); \
+ align_buffer_page_end(dst_argb_opt, kStrideB* kHeight + OFF); \
+ for (int i = 0; i < kWidth * kHeight; ++i) { \
+ src_y[i + OFF] = (fastrand() & 0xff); \
+ src_a[i + OFF] = (fastrand() & 0xff); \
+ } \
+ for (int i = 0; i < kSizeUV; ++i) { \
+ src_u[i + OFF] = (fastrand() & 0xff); \
+ src_v[i + OFF] = (fastrand() & 0xff); \
+ } \
+ memset(dst_argb_c + OFF, 1, kStrideB * kHeight); \
+ memset(dst_argb_opt + OFF, 101, kStrideB * kHeight); \
+ MaskCpuFlags(disable_cpu_flags_); \
+ FMT_PLANAR##To##FMT_B(src_y + OFF, kWidth, src_u + OFF, kStrideUV, \
+ src_v + OFF, kStrideUV, src_a + OFF, kWidth, \
+ dst_argb_c + OFF, kStrideB, kWidth, NEG kHeight, \
+ ATTEN); \
+ MaskCpuFlags(benchmark_cpu_info_); \
+ for (int i = 0; i < benchmark_iterations_; ++i) { \
+ FMT_PLANAR##To##FMT_B(src_y + OFF, kWidth, src_u + OFF, kStrideUV, \
+ src_v + OFF, kStrideUV, src_a + OFF, kWidth, \
+ dst_argb_opt + OFF, kStrideB, kWidth, NEG kHeight, \
+ ATTEN); \
+ } \
+ for (int i = 0; i < kWidth * BPP_B * kHeight; ++i) { \
+ EXPECT_EQ(dst_argb_c[i + OFF], dst_argb_opt[i + OFF]); \
+ } \
+ free_aligned_buffer_page_end(src_y); \
+ free_aligned_buffer_page_end(src_u); \
+ free_aligned_buffer_page_end(src_v); \
+ free_aligned_buffer_page_end(src_a); \
+ free_aligned_buffer_page_end(dst_argb_c); \
+ free_aligned_buffer_page_end(dst_argb_opt); \
}
- // Expect sucesss
- EXPECT_EQ(0, ret);
- // Test result matches known hash value.
- uint32_t dst_argb_hash = HashDjb2(dst_argb, width * height, 5381);
-#ifdef LIBYUV_UNLIMITED_DATA
- EXPECT_EQ(dst_argb_hash, 3900633302u);
+#if defined(ENABLE_FULL_TESTS)
+#define TESTQPLANARTOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
+ YALIGN) \
+ TESTQPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
+ YALIGN, benchmark_width_ + 1, _Any, +, 0, 0) \
+ TESTQPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
+ YALIGN, benchmark_width_, _Unaligned, +, 2, 0) \
+ TESTQPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
+ YALIGN, benchmark_width_, _Invert, -, 0, 0) \
+ TESTQPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
+ YALIGN, benchmark_width_, _Opt, +, 0, 0) \
+ TESTQPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
+ YALIGN, benchmark_width_, _Premult, +, 0, 1)
#else
- EXPECT_EQ(dst_argb_hash, 2355976473u);
+#define TESTQPLANARTOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
+ YALIGN) \
+ TESTQPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
+ YALIGN, benchmark_width_, _Opt, +, 0, 0)
#endif
- free_aligned_buffer_page_end(dst_argb);
-}
-
-static int ShowJPegInfo(const uint8_t* sample, size_t sample_size) {
- MJpegDecoder mjpeg_decoder;
- LIBYUV_BOOL ret = mjpeg_decoder.LoadFrame(sample, sample_size);
-
- int width = mjpeg_decoder.GetWidth();
- int height = mjpeg_decoder.GetHeight();
-
- // YUV420
- if (mjpeg_decoder.GetColorSpace() == MJpegDecoder::kColorSpaceYCbCr &&
- mjpeg_decoder.GetNumComponents() == 3 &&
- mjpeg_decoder.GetVertSampFactor(0) == 2 &&
- mjpeg_decoder.GetHorizSampFactor(0) == 2 &&
- mjpeg_decoder.GetVertSampFactor(1) == 1 &&
- mjpeg_decoder.GetHorizSampFactor(1) == 1 &&
- mjpeg_decoder.GetVertSampFactor(2) == 1 &&
- mjpeg_decoder.GetHorizSampFactor(2) == 1) {
- printf("JPeg is J420, %dx%d %d bytes\n", width, height,
- static_cast<int>(sample_size));
- // YUV422
- } else if (mjpeg_decoder.GetColorSpace() == MJpegDecoder::kColorSpaceYCbCr &&
- mjpeg_decoder.GetNumComponents() == 3 &&
- mjpeg_decoder.GetVertSampFactor(0) == 1 &&
- mjpeg_decoder.GetHorizSampFactor(0) == 2 &&
- mjpeg_decoder.GetVertSampFactor(1) == 1 &&
- mjpeg_decoder.GetHorizSampFactor(1) == 1 &&
- mjpeg_decoder.GetVertSampFactor(2) == 1 &&
- mjpeg_decoder.GetHorizSampFactor(2) == 1) {
- printf("JPeg is J422, %dx%d %d bytes\n", width, height,
- static_cast<int>(sample_size));
- // YUV444
- } else if (mjpeg_decoder.GetColorSpace() == MJpegDecoder::kColorSpaceYCbCr &&
- mjpeg_decoder.GetNumComponents() == 3 &&
- mjpeg_decoder.GetVertSampFactor(0) == 1 &&
- mjpeg_decoder.GetHorizSampFactor(0) == 1 &&
- mjpeg_decoder.GetVertSampFactor(1) == 1 &&
- mjpeg_decoder.GetHorizSampFactor(1) == 1 &&
- mjpeg_decoder.GetVertSampFactor(2) == 1 &&
- mjpeg_decoder.GetHorizSampFactor(2) == 1) {
- printf("JPeg is J444, %dx%d %d bytes\n", width, height,
- static_cast<int>(sample_size));
- // YUV400
- } else if (mjpeg_decoder.GetColorSpace() ==
- MJpegDecoder::kColorSpaceGrayscale &&
- mjpeg_decoder.GetNumComponents() == 1 &&
- mjpeg_decoder.GetVertSampFactor(0) == 1 &&
- mjpeg_decoder.GetHorizSampFactor(0) == 1) {
- printf("JPeg is J400, %dx%d %d bytes\n", width, height,
- static_cast<int>(sample_size));
- } else {
- // Unknown colorspace.
- printf("JPeg is Unknown colorspace.\n");
- }
- mjpeg_decoder.UnloadFrame();
- return ret;
-}
-
-TEST_F(LibYUVConvertTest, TestMJPGInfo) {
- EXPECT_EQ(1, ShowJPegInfo(kTest0Jpg, kTest0JpgLen));
- EXPECT_EQ(1, ShowJPegInfo(kTest1Jpg, kTest1JpgLen));
- EXPECT_EQ(1, ShowJPegInfo(kTest2Jpg, kTest2JpgLen));
- EXPECT_EQ(1, ShowJPegInfo(kTest3Jpg, kTest3JpgLen));
- EXPECT_EQ(1, ShowJPegInfo(kTest4Jpg,
- kTest4JpgLen)); // Valid but unsupported.
-}
-#endif // HAVE_JPEG
-
-TEST_F(LibYUVConvertTest, NV12Crop) {
- const int SUBSAMP_X = 2;
- const int SUBSAMP_Y = 2;
- const int kWidth = benchmark_width_;
- const int kHeight = benchmark_height_;
- const int crop_y =
- ((benchmark_height_ - (benchmark_height_ * 360 / 480)) / 2 + 1) & ~1;
- const int kDestWidth = benchmark_width_;
- const int kDestHeight = benchmark_height_ - crop_y * 2;
- const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X);
- const int sample_size =
- kWidth * kHeight + kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y) * 2;
- align_buffer_page_end(src_y, sample_size);
- uint8_t* src_uv = src_y + kWidth * kHeight;
-
- align_buffer_page_end(dst_y, kDestWidth * kDestHeight);
- align_buffer_page_end(dst_u, SUBSAMPLE(kDestWidth, SUBSAMP_X) *
- SUBSAMPLE(kDestHeight, SUBSAMP_Y));
- align_buffer_page_end(dst_v, SUBSAMPLE(kDestWidth, SUBSAMP_X) *
- SUBSAMPLE(kDestHeight, SUBSAMP_Y));
-
- align_buffer_page_end(dst_y_2, kDestWidth * kDestHeight);
- align_buffer_page_end(dst_u_2, SUBSAMPLE(kDestWidth, SUBSAMP_X) *
- SUBSAMPLE(kDestHeight, SUBSAMP_Y));
- align_buffer_page_end(dst_v_2, SUBSAMPLE(kDestWidth, SUBSAMP_X) *
- SUBSAMPLE(kDestHeight, SUBSAMP_Y));
-
- for (int i = 0; i < kHeight * kWidth; ++i) {
- src_y[i] = (fastrand() & 0xff);
- }
- for (int i = 0; i < (SUBSAMPLE(kHeight, SUBSAMP_Y) * kStrideUV) * 2; ++i) {
- src_uv[i] = (fastrand() & 0xff);
- }
- memset(dst_y, 1, kDestWidth * kDestHeight);
- memset(dst_u, 2,
- SUBSAMPLE(kDestWidth, SUBSAMP_X) * SUBSAMPLE(kDestHeight, SUBSAMP_Y));
- memset(dst_v, 3,
- SUBSAMPLE(kDestWidth, SUBSAMP_X) * SUBSAMPLE(kDestHeight, SUBSAMP_Y));
- memset(dst_y_2, 1, kDestWidth * kDestHeight);
- memset(dst_u_2, 2,
- SUBSAMPLE(kDestWidth, SUBSAMP_X) * SUBSAMPLE(kDestHeight, SUBSAMP_Y));
- memset(dst_v_2, 3,
- SUBSAMPLE(kDestWidth, SUBSAMP_X) * SUBSAMPLE(kDestHeight, SUBSAMP_Y));
-
- ConvertToI420(src_y, sample_size, dst_y_2, kDestWidth, dst_u_2,
- SUBSAMPLE(kDestWidth, SUBSAMP_X), dst_v_2,
- SUBSAMPLE(kDestWidth, SUBSAMP_X), 0, crop_y, kWidth, kHeight,
- kDestWidth, kDestHeight, libyuv::kRotate0, libyuv::FOURCC_NV12);
-
- NV12ToI420(src_y + crop_y * kWidth, kWidth,
- src_uv + (crop_y / 2) * kStrideUV * 2, kStrideUV * 2, dst_y,
- kDestWidth, dst_u, SUBSAMPLE(kDestWidth, SUBSAMP_X), dst_v,
- SUBSAMPLE(kDestWidth, SUBSAMP_X), kDestWidth, kDestHeight);
-
- for (int i = 0; i < kDestHeight; ++i) {
- for (int j = 0; j < kDestWidth; ++j) {
- EXPECT_EQ(dst_y[i * kWidth + j], dst_y_2[i * kWidth + j]);
- }
- }
- for (int i = 0; i < SUBSAMPLE(kDestHeight, SUBSAMP_Y); ++i) {
- for (int j = 0; j < SUBSAMPLE(kDestWidth, SUBSAMP_X); ++j) {
- EXPECT_EQ(dst_u[i * SUBSAMPLE(kDestWidth, SUBSAMP_X) + j],
- dst_u_2[i * SUBSAMPLE(kDestWidth, SUBSAMP_X) + j]);
- }
- }
- for (int i = 0; i < SUBSAMPLE(kDestHeight, SUBSAMP_Y); ++i) {
- for (int j = 0; j < SUBSAMPLE(kDestWidth, SUBSAMP_X); ++j) {
- EXPECT_EQ(dst_v[i * SUBSAMPLE(kDestWidth, SUBSAMP_X) + j],
- dst_v_2[i * SUBSAMPLE(kDestWidth, SUBSAMP_X) + j]);
- }
- }
- free_aligned_buffer_page_end(dst_y);
- free_aligned_buffer_page_end(dst_u);
- free_aligned_buffer_page_end(dst_v);
- free_aligned_buffer_page_end(dst_y_2);
- free_aligned_buffer_page_end(dst_u_2);
- free_aligned_buffer_page_end(dst_v_2);
- free_aligned_buffer_page_end(src_y);
-}
-
-TEST_F(LibYUVConvertTest, I420CropOddY) {
- const int SUBSAMP_X = 2;
- const int SUBSAMP_Y = 2;
- const int kWidth = benchmark_width_;
- const int kHeight = benchmark_height_;
- const int crop_y = benchmark_height_ > 1 ? 1 : 0;
- const int kDestWidth = benchmark_width_;
- const int kDestHeight = benchmark_height_ - crop_y * 2;
- const int kStrideU = SUBSAMPLE(kWidth, SUBSAMP_X);
- const int kStrideV = SUBSAMPLE(kWidth, SUBSAMP_X);
- const int sample_size = kWidth * kHeight +
- kStrideU * SUBSAMPLE(kHeight, SUBSAMP_Y) +
- kStrideV * SUBSAMPLE(kHeight, SUBSAMP_Y);
- align_buffer_page_end(src_y, sample_size);
- uint8_t* src_u = src_y + kWidth * kHeight;
- uint8_t* src_v = src_u + kStrideU * SUBSAMPLE(kHeight, SUBSAMP_Y);
-
- align_buffer_page_end(dst_y, kDestWidth * kDestHeight);
- align_buffer_page_end(dst_u, SUBSAMPLE(kDestWidth, SUBSAMP_X) *
- SUBSAMPLE(kDestHeight, SUBSAMP_Y));
- align_buffer_page_end(dst_v, SUBSAMPLE(kDestWidth, SUBSAMP_X) *
- SUBSAMPLE(kDestHeight, SUBSAMP_Y));
-
- for (int i = 0; i < kHeight * kWidth; ++i) {
- src_y[i] = (fastrand() & 0xff);
- }
- for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y) * kStrideU; ++i) {
- src_u[i] = (fastrand() & 0xff);
- }
- for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y) * kStrideV; ++i) {
- src_v[i] = (fastrand() & 0xff);
- }
- memset(dst_y, 1, kDestWidth * kDestHeight);
- memset(dst_u, 2,
- SUBSAMPLE(kDestWidth, SUBSAMP_X) * SUBSAMPLE(kDestHeight, SUBSAMP_Y));
- memset(dst_v, 3,
- SUBSAMPLE(kDestWidth, SUBSAMP_X) * SUBSAMPLE(kDestHeight, SUBSAMP_Y));
-
- MaskCpuFlags(benchmark_cpu_info_);
- for (int i = 0; i < benchmark_iterations_; ++i) {
- ConvertToI420(src_y, sample_size, dst_y, kDestWidth, dst_u,
- SUBSAMPLE(kDestWidth, SUBSAMP_X), dst_v,
- SUBSAMPLE(kDestWidth, SUBSAMP_X), 0, crop_y, kWidth, kHeight,
- kDestWidth, kDestHeight, libyuv::kRotate0,
- libyuv::FOURCC_I420);
- }
+#define J420AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \
+ I420AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvJPEGConstants, k, \
+ l, m)
+#define J420AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \
+ I420AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvJPEGConstants, k, \
+ l, m)
+#define F420AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \
+ I420AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvF709Constants, k, \
+ l, m)
+#define F420AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \
+ I420AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvF709Constants, k, \
+ l, m)
+#define H420AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \
+ I420AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvH709Constants, k, \
+ l, m)
+#define H420AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \
+ I420AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvH709Constants, k, \
+ l, m)
+#define U420AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \
+ I420AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuv2020Constants, k, \
+ l, m)
+#define U420AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \
+ I420AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuv2020Constants, k, \
+ l, m)
+#define V420AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \
+ I420AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvV2020Constants, k, \
+ l, m)
+#define V420AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \
+ I420AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvV2020Constants, k, \
+ l, m)
+#define J422AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \
+ I422AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvJPEGConstants, k, \
+ l, m)
+#define J422AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \
+ I422AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvJPEGConstants, k, \
+ l, m)
+#define F422AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \
+ I422AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvF709Constants, k, \
+ l, m)
+#define F422AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \
+ I422AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvF709Constants, k, \
+ l, m)
+#define H422AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \
+ I422AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvH709Constants, k, \
+ l, m)
+#define H422AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \
+ I422AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvH709Constants, k, \
+ l, m)
+#define U422AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \
+ I422AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuv2020Constants, k, \
+ l, m)
+#define U422AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \
+ I422AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuv2020Constants, k, \
+ l, m)
+#define V422AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \
+ I422AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvV2020Constants, k, \
+ l, m)
+#define V422AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \
+ I422AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvV2020Constants, k, \
+ l, m)
+#define J444AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \
+ I444AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvJPEGConstants, k, \
+ l, m)
+#define J444AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \
+ I444AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvJPEGConstants, k, \
+ l, m)
+#define F444AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \
+ I444AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvF709Constants, k, \
+ l, m)
+#define F444AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \
+ I444AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvF709Constants, k, \
+ l, m)
+#define H444AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \
+ I444AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvH709Constants, k, \
+ l, m)
+#define H444AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \
+ I444AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvH709Constants, k, \
+ l, m)
+#define U444AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \
+ I444AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuv2020Constants, k, \
+ l, m)
+#define U444AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \
+ I444AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuv2020Constants, k, \
+ l, m)
+#define V444AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \
+ I444AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvV2020Constants, k, \
+ l, m)
+#define V444AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \
+ I444AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvV2020Constants, k, \
+ l, m)
- for (int i = 0; i < kDestHeight; ++i) {
- for (int j = 0; j < kDestWidth; ++j) {
- EXPECT_EQ(src_y[crop_y * kWidth + i * kWidth + j],
- dst_y[i * kDestWidth + j]);
- }
- }
- for (int i = 0; i < SUBSAMPLE(kDestHeight, SUBSAMP_Y); ++i) {
- for (int j = 0; j < SUBSAMPLE(kDestWidth, SUBSAMP_X); ++j) {
- EXPECT_EQ(src_u[(crop_y / 2 + i) * kStrideU + j],
- dst_u[i * SUBSAMPLE(kDestWidth, SUBSAMP_X) + j]);
- }
- }
- for (int i = 0; i < SUBSAMPLE(kDestHeight, SUBSAMP_Y); ++i) {
- for (int j = 0; j < SUBSAMPLE(kDestWidth, SUBSAMP_X); ++j) {
- EXPECT_EQ(src_v[(crop_y / 2 + i) * kStrideV + j],
- dst_v[i * SUBSAMPLE(kDestWidth, SUBSAMP_X) + j]);
- }
- }
+#define I420AlphaToARGBFilter(a, b, c, d, e, f, g, h, i, j, k, l, m) \
+ I420AlphaToARGBMatrixFilter(a, b, c, d, e, f, g, h, i, j, \
+ &kYuvI601Constants, k, l, m, kFilterBilinear)
+#define I422AlphaToARGBFilter(a, b, c, d, e, f, g, h, i, j, k, l, m) \
+ I422AlphaToARGBMatrixFilter(a, b, c, d, e, f, g, h, i, j, \
+ &kYuvI601Constants, k, l, m, kFilterBilinear)
- free_aligned_buffer_page_end(dst_y);
- free_aligned_buffer_page_end(dst_u);
- free_aligned_buffer_page_end(dst_v);
- free_aligned_buffer_page_end(src_y);
-}
+#if defined(ENABLE_FULL_TESTS)
+TESTQPLANARTOB(I420Alpha, 2, 2, ARGB, 4, 4, 1)
+TESTQPLANARTOB(I420Alpha, 2, 2, ABGR, 4, 4, 1)
+TESTQPLANARTOB(J420Alpha, 2, 2, ARGB, 4, 4, 1)
+TESTQPLANARTOB(J420Alpha, 2, 2, ABGR, 4, 4, 1)
+TESTQPLANARTOB(H420Alpha, 2, 2, ARGB, 4, 4, 1)
+TESTQPLANARTOB(H420Alpha, 2, 2, ABGR, 4, 4, 1)
+TESTQPLANARTOB(F420Alpha, 2, 2, ARGB, 4, 4, 1)
+TESTQPLANARTOB(F420Alpha, 2, 2, ABGR, 4, 4, 1)
+TESTQPLANARTOB(U420Alpha, 2, 2, ARGB, 4, 4, 1)
+TESTQPLANARTOB(U420Alpha, 2, 2, ABGR, 4, 4, 1)
+TESTQPLANARTOB(V420Alpha, 2, 2, ARGB, 4, 4, 1)
+TESTQPLANARTOB(V420Alpha, 2, 2, ABGR, 4, 4, 1)
+TESTQPLANARTOB(I422Alpha, 2, 1, ARGB, 4, 4, 1)
+TESTQPLANARTOB(I422Alpha, 2, 1, ABGR, 4, 4, 1)
+TESTQPLANARTOB(J422Alpha, 2, 1, ARGB, 4, 4, 1)
+TESTQPLANARTOB(J422Alpha, 2, 1, ABGR, 4, 4, 1)
+TESTQPLANARTOB(H422Alpha, 2, 1, ARGB, 4, 4, 1)
+TESTQPLANARTOB(H422Alpha, 2, 1, ABGR, 4, 4, 1)
+TESTQPLANARTOB(F422Alpha, 2, 1, ARGB, 4, 4, 1)
+TESTQPLANARTOB(F422Alpha, 2, 1, ABGR, 4, 4, 1)
+TESTQPLANARTOB(U422Alpha, 2, 1, ARGB, 4, 4, 1)
+TESTQPLANARTOB(U422Alpha, 2, 1, ABGR, 4, 4, 1)
+TESTQPLANARTOB(V422Alpha, 2, 1, ARGB, 4, 4, 1)
+TESTQPLANARTOB(V422Alpha, 2, 1, ABGR, 4, 4, 1)
+TESTQPLANARTOB(I444Alpha, 1, 1, ARGB, 4, 4, 1)
+TESTQPLANARTOB(I444Alpha, 1, 1, ABGR, 4, 4, 1)
+TESTQPLANARTOB(J444Alpha, 1, 1, ARGB, 4, 4, 1)
+TESTQPLANARTOB(J444Alpha, 1, 1, ABGR, 4, 4, 1)
+TESTQPLANARTOB(H444Alpha, 1, 1, ARGB, 4, 4, 1)
+TESTQPLANARTOB(H444Alpha, 1, 1, ABGR, 4, 4, 1)
+TESTQPLANARTOB(F444Alpha, 1, 1, ARGB, 4, 4, 1)
+TESTQPLANARTOB(F444Alpha, 1, 1, ABGR, 4, 4, 1)
+TESTQPLANARTOB(U444Alpha, 1, 1, ARGB, 4, 4, 1)
+TESTQPLANARTOB(U444Alpha, 1, 1, ABGR, 4, 4, 1)
+TESTQPLANARTOB(V444Alpha, 1, 1, ARGB, 4, 4, 1)
+TESTQPLANARTOB(V444Alpha, 1, 1, ABGR, 4, 4, 1)
+TESTQPLANARTOB(I420Alpha, 2, 2, ARGBFilter, 4, 4, 1)
+TESTQPLANARTOB(I422Alpha, 2, 1, ARGBFilter, 4, 4, 1)
+#else
+TESTQPLANARTOB(I420Alpha, 2, 2, ARGB, 4, 4, 1)
+TESTQPLANARTOB(I422Alpha, 2, 1, ARGB, 4, 4, 1)
+TESTQPLANARTOB(I444Alpha, 1, 1, ARGB, 4, 4, 1)
+TESTQPLANARTOB(I420Alpha, 2, 2, ARGBFilter, 4, 4, 1)
+TESTQPLANARTOB(I422Alpha, 2, 1, ARGBFilter, 4, 4, 1)
+#endif
TEST_F(LibYUVConvertTest, TestYToARGB) {
uint8_t y[32];
@@ -2846,6 +1288,7 @@ TEST_F(LibYUVConvertTest, TestDither) {
free_aligned_buffer_page_end(dst_argb32_opt); \
}
+#if defined(ENABLE_FULL_TESTS)
#define TESTPLANARTOBD(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
YALIGN, FMT_C, BPP_C) \
TESTPLANARTOBID(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
@@ -2856,71 +1299,17 @@ TEST_F(LibYUVConvertTest, TestDither) {
YALIGN, benchmark_width_, _Invert, -, 0, FMT_C, BPP_C) \
TESTPLANARTOBID(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
YALIGN, benchmark_width_, _Opt, +, 0, FMT_C, BPP_C)
+#else
+#define TESTPLANARTOBD(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
+ YALIGN, FMT_C, BPP_C) \
+ TESTPLANARTOBID(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
+ YALIGN, benchmark_width_, _Opt, +, 0, FMT_C, BPP_C)
+#endif
#ifdef LITTLE_ENDIAN_ONLY_TEST
TESTPLANARTOBD(I420, 2, 2, RGB565, 2, 2, 1, ARGB, 4)
#endif
-#define TESTPTOB(NAME, UYVYTOI420, UYVYTONV12) \
- TEST_F(LibYUVConvertTest, NAME) { \
- const int kWidth = benchmark_width_; \
- const int kHeight = benchmark_height_; \
- \
- align_buffer_page_end(orig_uyvy, 4 * SUBSAMPLE(kWidth, 2) * kHeight); \
- align_buffer_page_end(orig_y, kWidth* kHeight); \
- align_buffer_page_end(orig_u, \
- SUBSAMPLE(kWidth, 2) * SUBSAMPLE(kHeight, 2)); \
- align_buffer_page_end(orig_v, \
- SUBSAMPLE(kWidth, 2) * SUBSAMPLE(kHeight, 2)); \
- \
- align_buffer_page_end(dst_y_orig, kWidth* kHeight); \
- align_buffer_page_end(dst_uv_orig, \
- 2 * SUBSAMPLE(kWidth, 2) * SUBSAMPLE(kHeight, 2)); \
- \
- align_buffer_page_end(dst_y, kWidth* kHeight); \
- align_buffer_page_end(dst_uv, \
- 2 * SUBSAMPLE(kWidth, 2) * SUBSAMPLE(kHeight, 2)); \
- \
- MemRandomize(orig_uyvy, 4 * SUBSAMPLE(kWidth, 2) * kHeight); \
- \
- /* Convert UYVY to NV12 in 2 steps for reference */ \
- libyuv::UYVYTOI420(orig_uyvy, 4 * SUBSAMPLE(kWidth, 2), orig_y, kWidth, \
- orig_u, SUBSAMPLE(kWidth, 2), orig_v, \
- SUBSAMPLE(kWidth, 2), kWidth, kHeight); \
- libyuv::I420ToNV12(orig_y, kWidth, orig_u, SUBSAMPLE(kWidth, 2), orig_v, \
- SUBSAMPLE(kWidth, 2), dst_y_orig, kWidth, dst_uv_orig, \
- 2 * SUBSAMPLE(kWidth, 2), kWidth, kHeight); \
- \
- /* Convert to NV12 */ \
- for (int i = 0; i < benchmark_iterations_; ++i) { \
- libyuv::UYVYTONV12(orig_uyvy, 4 * SUBSAMPLE(kWidth, 2), dst_y, kWidth, \
- dst_uv, 2 * SUBSAMPLE(kWidth, 2), kWidth, kHeight); \
- } \
- \
- for (int i = 0; i < kWidth * kHeight; ++i) { \
- EXPECT_EQ(orig_y[i], dst_y[i]); \
- } \
- for (int i = 0; i < kWidth * kHeight; ++i) { \
- EXPECT_EQ(dst_y_orig[i], dst_y[i]); \
- } \
- for (int i = 0; i < 2 * SUBSAMPLE(kWidth, 2) * SUBSAMPLE(kHeight, 2); \
- ++i) { \
- EXPECT_EQ(dst_uv_orig[i], dst_uv[i]); \
- } \
- \
- free_aligned_buffer_page_end(orig_uyvy); \
- free_aligned_buffer_page_end(orig_y); \
- free_aligned_buffer_page_end(orig_u); \
- free_aligned_buffer_page_end(orig_v); \
- free_aligned_buffer_page_end(dst_y_orig); \
- free_aligned_buffer_page_end(dst_uv_orig); \
- free_aligned_buffer_page_end(dst_y); \
- free_aligned_buffer_page_end(dst_uv); \
- }
-
-TESTPTOB(TestYUY2ToNV12, YUY2ToI420, YUY2ToNV12)
-TESTPTOB(TestUYVYToNV12, UYVYToI420, UYVYToNV12)
-
// Transitive test. A to B to C is same as A to C.
// Benchmarks A To B to C for comparison to 1 step, benchmarked elsewhere.
#define TESTPLANARTOEI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, SUB_B, BPP_B, \
@@ -3223,6 +1612,7 @@ TESTQPLANARTOE(I444Alpha, 1, 1, ABGR, 1, 4, ARGB, 4)
free_aligned_buffer_page_end(dst_argb_bc); \
}
+#if defined(ENABLE_FULL_TESTS)
#define TESTPLANETOE(FMT_A, SUB_A, BPP_A, FMT_B, SUB_B, BPP_B, FMT_C, BPP_C) \
TESTPLANETOEI(FMT_A, SUB_A, BPP_A, FMT_B, SUB_B, BPP_B, \
benchmark_width_ + 1, _Any, +, 0, FMT_C, BPP_C) \
@@ -3232,6 +1622,11 @@ TESTQPLANARTOE(I444Alpha, 1, 1, ABGR, 1, 4, ARGB, 4)
_Invert, -, 0, FMT_C, BPP_C) \
TESTPLANETOEI(FMT_A, SUB_A, BPP_A, FMT_B, SUB_B, BPP_B, benchmark_width_, \
_Opt, +, 0, FMT_C, BPP_C)
+#else
+#define TESTPLANETOE(FMT_A, SUB_A, BPP_A, FMT_B, SUB_B, BPP_B, FMT_C, BPP_C) \
+ TESTPLANETOEI(FMT_A, SUB_A, BPP_A, FMT_B, SUB_B, BPP_B, benchmark_width_, \
+ _Opt, +, 0, FMT_C, BPP_C)
+#endif
// Caveat: Destination needs to be 4 bytes
#ifdef LITTLE_ENDIAN_ONLY_TEST
@@ -3348,11 +1743,15 @@ TEST_F(LibYUVConvertTest, ABGRToAR30Row_Opt) {
}
#endif // HAS_ABGRTOAR30ROW_AVX2
+#if !defined(LEAN_TESTS)
+
// Provide matrix wrappers for 12 bit YUV
#define I012ToARGB(a, b, c, d, e, f, g, h, i, j) \
I012ToARGBMatrix(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j)
#define I012ToAR30(a, b, c, d, e, f, g, h, i, j) \
I012ToAR30Matrix(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j)
+#define I012ToAB30(a, b, c, d, e, f, g, h, i, j) \
+ I012ToAB30Matrix(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j)
#define I410ToARGB(a, b, c, d, e, f, g, h, i, j) \
I410ToARGBMatrix(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j)
@@ -3440,6 +1839,7 @@ TEST_F(LibYUVConvertTest, ABGRToAR30Row_Opt) {
free_aligned_buffer_page_end(dst_argb_opt); \
}
+#if defined(ENABLE_FULL_TESTS)
#define TESTPLANAR16TOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_MASK, FMT_B, \
BPP_B, ALIGN, YALIGN) \
TESTPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_MASK, FMT_B, BPP_B, \
@@ -3450,6 +1850,12 @@ TEST_F(LibYUVConvertTest, ABGRToAR30Row_Opt) {
ALIGN, YALIGN, benchmark_width_, _Invert, -, 0, 0) \
TESTPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_MASK, FMT_B, BPP_B, \
ALIGN, YALIGN, benchmark_width_, _Opt, +, 0, 0)
+#else
+#define TESTPLANAR16TOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_MASK, FMT_B, \
+ BPP_B, ALIGN, YALIGN) \
+ TESTPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_MASK, FMT_B, BPP_B, \
+ ALIGN, YALIGN, benchmark_width_, _Opt, +, 0, 0)
+#endif
// These conversions are only optimized for x86
#if !defined(DISABLE_SLOW_TESTS) || defined(__x86_64__) || defined(__i386__)
@@ -3495,6 +1901,7 @@ TESTPLANAR16TOB(H410, 1, 1, 0x3ff, AB30, 4, 4, 1)
TESTPLANAR16TOB(U410, 1, 1, 0x3ff, AR30, 4, 4, 1)
TESTPLANAR16TOB(U410, 1, 1, 0x3ff, AB30, 4, 4, 1)
TESTPLANAR16TOB(I012, 2, 2, 0xfff, AR30, 4, 4, 1)
+TESTPLANAR16TOB(I012, 2, 2, 0xfff, AB30, 4, 4, 1)
TESTPLANAR16TOB(I010, 2, 2, 0x3ff, AR30Filter, 4, 4, 1)
TESTPLANAR16TOB(I210, 2, 1, 0x3ff, AR30Filter, 4, 4, 1)
#endif // LITTLE_ENDIAN_ONLY_TEST
@@ -3733,8 +2140,8 @@ TESTQPLANAR16TOB(I010Alpha, 2, 2, ARGBFilter, 4, 4, 1, 10)
TESTQPLANAR16TOB(I210Alpha, 2, 1, ARGBFilter, 4, 4, 1, 10)
#endif // DISABLE_SLOW_TESTS
-#define TESTBIPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, \
- ALIGN, YALIGN, W1280, N, NEG, SOFF, DOFF, S_DEPTH) \
+#define TESTBP16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
+ YALIGN, W1280, N, NEG, SOFF, DOFF, S_DEPTH) \
TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##N) { \
const int kWidth = W1280; \
const int kHeight = ALIGNINT(benchmark_height_, YALIGN); \
@@ -3777,16 +2184,23 @@ TESTQPLANAR16TOB(I210Alpha, 2, 1, ARGBFilter, 4, 4, 1, 10)
free_aligned_buffer_page_end(dst_argb_opt); \
}
-#define TESTBIPLANAR16TOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, \
- ALIGN, YALIGN, S_DEPTH) \
- TESTBIPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
- YALIGN, benchmark_width_ + 1, _Any, +, 0, 0, S_DEPTH) \
- TESTBIPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
- YALIGN, benchmark_width_, _Unaligned, +, 4, 4, S_DEPTH) \
- TESTBIPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
- YALIGN, benchmark_width_, _Invert, -, 0, 0, S_DEPTH) \
- TESTBIPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
- YALIGN, benchmark_width_, _Opt, +, 0, 0, S_DEPTH)
+#if defined(ENABLE_FULL_TESTS)
+#define TESTBP16TOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
+ YALIGN, S_DEPTH) \
+ TESTBP16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, YALIGN, \
+ benchmark_width_ + 1, _Any, +, 0, 0, S_DEPTH) \
+ TESTBP16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, YALIGN, \
+ benchmark_width_, _Unaligned, +, 4, 4, S_DEPTH) \
+ TESTBP16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, YALIGN, \
+ benchmark_width_, _Invert, -, 0, 0, S_DEPTH) \
+ TESTBP16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, YALIGN, \
+ benchmark_width_, _Opt, +, 0, 0, S_DEPTH)
+#else
+#define TESTBP16TOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
+ YALIGN, S_DEPTH) \
+ TESTBP16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, YALIGN, \
+ benchmark_width_, _Opt, +, 0, 0, S_DEPTH)
+#endif
#define P010ToARGB(a, b, c, d, e, f, g, h) \
P010ToARGBMatrix(a, b, c, d, e, f, &kYuvH709Constants, g, h)
@@ -3829,23 +2243,23 @@ TESTQPLANAR16TOB(I210Alpha, 2, 1, ARGBFilter, 4, 4, 1, 10)
kFilterBilinear)
#if !defined(DISABLE_SLOW_TESTS) || defined(__x86_64__) || defined(__i386__)
-TESTBIPLANAR16TOB(P010, 2, 2, ARGB, 4, 4, 1, 10)
-TESTBIPLANAR16TOB(P210, 2, 1, ARGB, 4, 4, 1, 10)
-TESTBIPLANAR16TOB(P012, 2, 2, ARGB, 4, 4, 1, 12)
-TESTBIPLANAR16TOB(P212, 2, 1, ARGB, 4, 4, 1, 12)
-TESTBIPLANAR16TOB(P016, 2, 2, ARGB, 4, 4, 1, 16)
-TESTBIPLANAR16TOB(P216, 2, 1, ARGB, 4, 4, 1, 16)
-TESTBIPLANAR16TOB(P010, 2, 2, ARGBFilter, 4, 4, 1, 10)
-TESTBIPLANAR16TOB(P210, 2, 1, ARGBFilter, 4, 4, 1, 10)
+TESTBP16TOB(P010, 2, 2, ARGB, 4, 4, 1, 10)
+TESTBP16TOB(P210, 2, 1, ARGB, 4, 4, 1, 10)
+TESTBP16TOB(P012, 2, 2, ARGB, 4, 4, 1, 12)
+TESTBP16TOB(P212, 2, 1, ARGB, 4, 4, 1, 12)
+TESTBP16TOB(P016, 2, 2, ARGB, 4, 4, 1, 16)
+TESTBP16TOB(P216, 2, 1, ARGB, 4, 4, 1, 16)
+TESTBP16TOB(P010, 2, 2, ARGBFilter, 4, 4, 1, 10)
+TESTBP16TOB(P210, 2, 1, ARGBFilter, 4, 4, 1, 10)
#ifdef LITTLE_ENDIAN_ONLY_TEST
-TESTBIPLANAR16TOB(P010, 2, 2, AR30, 4, 4, 1, 10)
-TESTBIPLANAR16TOB(P210, 2, 1, AR30, 4, 4, 1, 10)
-TESTBIPLANAR16TOB(P012, 2, 2, AR30, 4, 4, 1, 12)
-TESTBIPLANAR16TOB(P212, 2, 1, AR30, 4, 4, 1, 12)
-TESTBIPLANAR16TOB(P016, 2, 2, AR30, 4, 4, 1, 16)
-TESTBIPLANAR16TOB(P216, 2, 1, AR30, 4, 4, 1, 16)
-TESTBIPLANAR16TOB(P010, 2, 2, AR30Filter, 4, 4, 1, 10)
-TESTBIPLANAR16TOB(P210, 2, 1, AR30Filter, 4, 4, 1, 10)
+TESTBP16TOB(P010, 2, 2, AR30, 4, 4, 1, 10)
+TESTBP16TOB(P210, 2, 1, AR30, 4, 4, 1, 10)
+TESTBP16TOB(P012, 2, 2, AR30, 4, 4, 1, 12)
+TESTBP16TOB(P212, 2, 1, AR30, 4, 4, 1, 12)
+TESTBP16TOB(P016, 2, 2, AR30, 4, 4, 1, 16)
+TESTBP16TOB(P216, 2, 1, AR30, 4, 4, 1, 16)
+TESTBP16TOB(P010, 2, 2, AR30Filter, 4, 4, 1, 10)
+TESTBP16TOB(P210, 2, 1, AR30Filter, 4, 4, 1, 10)
#endif // LITTLE_ENDIAN_ONLY_TEST
#endif // DISABLE_SLOW_TESTS
@@ -4281,61 +2695,6 @@ TEST_F(LibYUVConvertTest, Test565) {
uint32_t checksum = HashDjb2(&pixels565[0][0], sizeof(pixels565), 5381);
EXPECT_EQ(610919429u, checksum);
}
-
-// Test RGB24 to J420 is exact
-#if defined(LIBYUV_BIT_EXACT)
-TEST_F(LibYUVConvertTest, TestRGB24ToJ420) {
- const int kSize = 256;
- align_buffer_page_end(orig_rgb24, kSize * 3 * 2); // 2 rows of RGB24
- align_buffer_page_end(dest_j420, kSize * 3 / 2 * 2);
- int iterations256 = (benchmark_width_ * benchmark_height_ + (kSize * 2 - 1)) /
- (kSize * 2) * benchmark_iterations_;
-
- for (int i = 0; i < kSize * 3 * 2; ++i) {
- orig_rgb24[i] = i;
- }
-
- for (int i = 0; i < iterations256; ++i) {
- RGB24ToJ420(orig_rgb24, kSize * 3, dest_j420, kSize, // Y plane
- dest_j420 + kSize * 2, kSize / 2, // U plane
- dest_j420 + kSize * 5 / 2, kSize / 2, // V plane
- kSize, 2);
- }
-
- uint32_t checksum = HashDjb2(dest_j420, kSize * 3 / 2 * 2, 5381);
- EXPECT_EQ(2755440272u, checksum);
-
- free_aligned_buffer_page_end(orig_rgb24);
- free_aligned_buffer_page_end(dest_j420);
-}
-#endif
-
-// Test RGB24 to I420 is exact
-#if defined(LIBYUV_BIT_EXACT)
-TEST_F(LibYUVConvertTest, TestRGB24ToI420) {
- const int kSize = 256;
- align_buffer_page_end(orig_rgb24, kSize * 3 * 2); // 2 rows of RGB24
- align_buffer_page_end(dest_i420, kSize * 3 / 2 * 2);
- int iterations256 = (benchmark_width_ * benchmark_height_ + (kSize * 2 - 1)) /
- (kSize * 2) * benchmark_iterations_;
-
- for (int i = 0; i < kSize * 3 * 2; ++i) {
- orig_rgb24[i] = i;
- }
-
- for (int i = 0; i < iterations256; ++i) {
- RGB24ToI420(orig_rgb24, kSize * 3, dest_i420, kSize, // Y plane
- dest_i420 + kSize * 2, kSize / 2, // U plane
- dest_i420 + kSize * 5 / 2, kSize / 2, // V plane
- kSize, 2);
- }
-
- uint32_t checksum = HashDjb2(dest_i420, kSize * 3 / 2 * 2, 5381);
- EXPECT_EQ(1526656597u, checksum);
-
- free_aligned_buffer_page_end(orig_rgb24);
- free_aligned_buffer_page_end(dest_i420);
-}
-#endif
+#endif // !defined(LEAN_TESTS)
} // namespace libyuv
diff --git a/unit_test/convert_test.cc b/unit_test/convert_test.cc
new file mode 100644
index 00000000..f55bace3
--- /dev/null
+++ b/unit_test/convert_test.cc
@@ -0,0 +1,2110 @@
+/*
+ * Copyright 2011 The LibYuv Project Authors. All rights reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <assert.h>
+#include <stdlib.h>
+#include <time.h>
+
+#include "libyuv/basic_types.h"
+#include "libyuv/compare.h"
+#include "libyuv/convert.h"
+#include "libyuv/convert_argb.h"
+#include "libyuv/convert_from.h"
+#include "libyuv/convert_from_argb.h"
+#include "libyuv/cpu_id.h"
+#ifdef HAVE_JPEG
+#include "libyuv/mjpeg_decoder.h"
+#endif
+#include "../unit_test/unit_test.h"
+#include "libyuv/planar_functions.h"
+#include "libyuv/rotate.h"
+#include "libyuv/video_common.h"
+
+#ifdef ENABLE_ROW_TESTS
+#include "libyuv/row.h" /* For ARGBToAR30Row_AVX2 */
+#endif
+
+#if defined(__riscv) && !defined(__clang__)
+#define DISABLE_SLOW_TESTS
+#undef ENABLE_FULL_TESTS
+#undef ENABLE_ROW_TESTS
+#define LEAN_TESTS
+#endif
+
+// Some functions fail on big endian. Enable these tests on all cpus except
+// PowerPC, but they are not optimized so disabled by default.
+#if !defined(DISABLE_SLOW_TESTS) && !defined(__powerpc__)
+#define LITTLE_ENDIAN_ONLY_TEST 1
+#endif
+#if !defined(DISABLE_SLOW_TESTS) || defined(__x86_64__) || defined(__i386__)
+// SLOW TESTS are those that are unoptimized C code.
+// FULL TESTS are optimized but test many variations of the same code.
+#define ENABLE_FULL_TESTS
+#endif
+
+namespace libyuv {
+
+// Alias to copy pixels as is
+#define AR30ToAR30 ARGBCopy
+#define ABGRToABGR ARGBCopy
+
+// subsample amount uses a divide.
+#define SUBSAMPLE(v, a) ((((v) + (a)-1)) / (a))
+
+#define ALIGNINT(V, ALIGN) (((V) + (ALIGN)-1) / (ALIGN) * (ALIGN))
+
+// Planar test
+
+#define TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
+ SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, \
+ DST_SUBSAMP_X, DST_SUBSAMP_Y, W1280, N, NEG, OFF, \
+ SRC_DEPTH) \
+ TEST_F(LibYUVConvertTest, SRC_FMT_PLANAR##To##FMT_PLANAR##N) { \
+ static_assert(SRC_BPC == 1 || SRC_BPC == 2, "SRC BPC unsupported"); \
+ static_assert(DST_BPC == 1 || DST_BPC == 2, "DST BPC unsupported"); \
+ static_assert(SRC_SUBSAMP_X == 1 || SRC_SUBSAMP_X == 2, \
+ "SRC_SUBSAMP_X unsupported"); \
+ static_assert(SRC_SUBSAMP_Y == 1 || SRC_SUBSAMP_Y == 2, \
+ "SRC_SUBSAMP_Y unsupported"); \
+ static_assert(DST_SUBSAMP_X == 1 || DST_SUBSAMP_X == 2, \
+ "DST_SUBSAMP_X unsupported"); \
+ static_assert(DST_SUBSAMP_Y == 1 || DST_SUBSAMP_Y == 2, \
+ "DST_SUBSAMP_Y unsupported"); \
+ const int kWidth = W1280; \
+ const int kHeight = benchmark_height_; \
+ const int kSrcHalfWidth = SUBSAMPLE(kWidth, SRC_SUBSAMP_X); \
+ const int kSrcHalfHeight = SUBSAMPLE(kHeight, SRC_SUBSAMP_Y); \
+ const int kDstHalfWidth = SUBSAMPLE(kWidth, DST_SUBSAMP_X); \
+ const int kDstHalfHeight = SUBSAMPLE(kHeight, DST_SUBSAMP_Y); \
+ align_buffer_page_end(src_y, kWidth* kHeight* SRC_BPC + OFF); \
+ align_buffer_page_end(src_u, \
+ kSrcHalfWidth* kSrcHalfHeight* SRC_BPC + OFF); \
+ align_buffer_page_end(src_v, \
+ kSrcHalfWidth* kSrcHalfHeight* SRC_BPC + OFF); \
+ align_buffer_page_end(dst_y_c, kWidth* kHeight* DST_BPC); \
+ align_buffer_page_end(dst_u_c, kDstHalfWidth* kDstHalfHeight* DST_BPC); \
+ align_buffer_page_end(dst_v_c, kDstHalfWidth* kDstHalfHeight* DST_BPC); \
+ align_buffer_page_end(dst_y_opt, kWidth* kHeight* DST_BPC); \
+ align_buffer_page_end(dst_u_opt, kDstHalfWidth* kDstHalfHeight* DST_BPC); \
+ align_buffer_page_end(dst_v_opt, kDstHalfWidth* kDstHalfHeight* DST_BPC); \
+ MemRandomize(src_y + OFF, kWidth * kHeight * SRC_BPC); \
+ MemRandomize(src_u + OFF, kSrcHalfWidth * kSrcHalfHeight * SRC_BPC); \
+ MemRandomize(src_v + OFF, kSrcHalfWidth * kSrcHalfHeight * SRC_BPC); \
+ SRC_T* src_y_p = reinterpret_cast<SRC_T*>(src_y + OFF); \
+ SRC_T* src_u_p = reinterpret_cast<SRC_T*>(src_u + OFF); \
+ SRC_T* src_v_p = reinterpret_cast<SRC_T*>(src_v + OFF); \
+ for (int i = 0; i < kWidth * kHeight; ++i) { \
+ src_y_p[i] = src_y_p[i] & ((1 << SRC_DEPTH) - 1); \
+ } \
+ for (int i = 0; i < kSrcHalfWidth * kSrcHalfHeight; ++i) { \
+ src_u_p[i] = src_u_p[i] & ((1 << SRC_DEPTH) - 1); \
+ src_v_p[i] = src_v_p[i] & ((1 << SRC_DEPTH) - 1); \
+ } \
+ memset(dst_y_c, 1, kWidth* kHeight* DST_BPC); \
+ memset(dst_u_c, 2, kDstHalfWidth* kDstHalfHeight* DST_BPC); \
+ memset(dst_v_c, 3, kDstHalfWidth* kDstHalfHeight* DST_BPC); \
+ memset(dst_y_opt, 101, kWidth* kHeight* DST_BPC); \
+ memset(dst_u_opt, 102, kDstHalfWidth* kDstHalfHeight* DST_BPC); \
+ memset(dst_v_opt, 103, kDstHalfWidth* kDstHalfHeight* DST_BPC); \
+ MaskCpuFlags(disable_cpu_flags_); \
+ SRC_FMT_PLANAR##To##FMT_PLANAR( \
+ src_y_p, kWidth, src_u_p, kSrcHalfWidth, src_v_p, kSrcHalfWidth, \
+ reinterpret_cast<DST_T*>(dst_y_c), kWidth, \
+ reinterpret_cast<DST_T*>(dst_u_c), kDstHalfWidth, \
+ reinterpret_cast<DST_T*>(dst_v_c), kDstHalfWidth, kWidth, \
+ NEG kHeight); \
+ MaskCpuFlags(benchmark_cpu_info_); \
+ for (int i = 0; i < benchmark_iterations_; ++i) { \
+ SRC_FMT_PLANAR##To##FMT_PLANAR( \
+ src_y_p, kWidth, src_u_p, kSrcHalfWidth, src_v_p, kSrcHalfWidth, \
+ reinterpret_cast<DST_T*>(dst_y_opt), kWidth, \
+ reinterpret_cast<DST_T*>(dst_u_opt), kDstHalfWidth, \
+ reinterpret_cast<DST_T*>(dst_v_opt), kDstHalfWidth, kWidth, \
+ NEG kHeight); \
+ } \
+ for (int i = 0; i < kHeight * kWidth * DST_BPC; ++i) { \
+ EXPECT_EQ(dst_y_c[i], dst_y_opt[i]); \
+ } \
+ for (int i = 0; i < kDstHalfWidth * kDstHalfHeight * DST_BPC; ++i) { \
+ EXPECT_EQ(dst_u_c[i], dst_u_opt[i]); \
+ EXPECT_EQ(dst_v_c[i], dst_v_opt[i]); \
+ } \
+ free_aligned_buffer_page_end(dst_y_c); \
+ free_aligned_buffer_page_end(dst_u_c); \
+ free_aligned_buffer_page_end(dst_v_c); \
+ free_aligned_buffer_page_end(dst_y_opt); \
+ free_aligned_buffer_page_end(dst_u_opt); \
+ free_aligned_buffer_page_end(dst_v_opt); \
+ free_aligned_buffer_page_end(src_y); \
+ free_aligned_buffer_page_end(src_u); \
+ free_aligned_buffer_page_end(src_v); \
+ }
+
+#if defined(ENABLE_FULL_TESTS)
+#define TESTPLANARTOP(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
+ SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, \
+ DST_SUBSAMP_X, DST_SUBSAMP_Y, SRC_DEPTH) \
+ TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
+ FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \
+ benchmark_width_ + 1, _Any, +, 0, SRC_DEPTH) \
+ TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
+ FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \
+ benchmark_width_, _Unaligned, +, 2, SRC_DEPTH) \
+ TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
+ FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \
+ benchmark_width_, _Invert, -, 0, SRC_DEPTH) \
+ TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
+ FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \
+ benchmark_width_, _Opt, +, 0, SRC_DEPTH)
+#else
+#define TESTPLANARTOP(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
+ SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, \
+ DST_SUBSAMP_X, DST_SUBSAMP_Y, SRC_DEPTH) \
+ TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
+ FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \
+ benchmark_width_, _Opt, +, 0, SRC_DEPTH)
+#endif
+
+TESTPLANARTOP(I420, uint8_t, 1, 2, 2, I420, uint8_t, 1, 2, 2, 8)
+TESTPLANARTOP(I422, uint8_t, 1, 2, 1, I420, uint8_t, 1, 2, 2, 8)
+TESTPLANARTOP(I444, uint8_t, 1, 1, 1, I420, uint8_t, 1, 2, 2, 8)
+TESTPLANARTOP(I420, uint8_t, 1, 2, 2, I422, uint8_t, 1, 2, 1, 8)
+TESTPLANARTOP(I420, uint8_t, 1, 2, 2, I444, uint8_t, 1, 1, 1, 8)
+TESTPLANARTOP(I420, uint8_t, 1, 2, 2, I420Mirror, uint8_t, 1, 2, 2, 8)
+TESTPLANARTOP(I422, uint8_t, 1, 2, 1, I422, uint8_t, 1, 2, 1, 8)
+TESTPLANARTOP(I422, uint8_t, 1, 2, 1, I444, uint8_t, 1, 1, 1, 8)
+TESTPLANARTOP(I444, uint8_t, 1, 1, 1, I444, uint8_t, 1, 1, 1, 8)
+TESTPLANARTOP(I010, uint16_t, 2, 2, 2, I010, uint16_t, 2, 2, 2, 10)
+TESTPLANARTOP(I420, uint8_t, 1, 2, 2, I010, uint16_t, 2, 2, 2, 8)
+TESTPLANARTOP(I420, uint8_t, 1, 2, 2, I012, uint16_t, 2, 2, 2, 8)
+TESTPLANARTOP(H010, uint16_t, 2, 2, 2, H010, uint16_t, 2, 2, 2, 10)
+TESTPLANARTOP(H010, uint16_t, 2, 2, 2, H420, uint8_t, 1, 2, 2, 10)
+TESTPLANARTOP(H420, uint8_t, 1, 2, 2, H010, uint16_t, 2, 2, 2, 8)
+TESTPLANARTOP(H420, uint8_t, 1, 2, 2, H012, uint16_t, 2, 2, 2, 8)
+TESTPLANARTOP(I010, uint16_t, 2, 2, 2, I410, uint16_t, 2, 1, 1, 10)
+TESTPLANARTOP(I210, uint16_t, 2, 2, 1, I410, uint16_t, 2, 1, 1, 10)
+TESTPLANARTOP(I012, uint16_t, 2, 2, 2, I412, uint16_t, 2, 1, 1, 12)
+TESTPLANARTOP(I212, uint16_t, 2, 2, 1, I412, uint16_t, 2, 1, 1, 12)
+TESTPLANARTOP(I410, uint16_t, 2, 1, 1, I010, uint16_t, 2, 2, 2, 10)
+TESTPLANARTOP(I210, uint16_t, 2, 2, 1, I010, uint16_t, 2, 2, 2, 10)
+TESTPLANARTOP(I412, uint16_t, 2, 1, 1, I012, uint16_t, 2, 2, 2, 12)
+TESTPLANARTOP(I212, uint16_t, 2, 2, 1, I012, uint16_t, 2, 2, 2, 12)
+TESTPLANARTOP(I010, uint16_t, 2, 2, 2, I420, uint8_t, 1, 2, 2, 10)
+TESTPLANARTOP(I210, uint16_t, 2, 2, 1, I420, uint8_t, 1, 2, 2, 10)
+TESTPLANARTOP(I210, uint16_t, 2, 2, 1, I422, uint8_t, 1, 2, 1, 10)
+TESTPLANARTOP(I410, uint16_t, 2, 1, 1, I420, uint8_t, 1, 2, 2, 10)
+TESTPLANARTOP(I410, uint16_t, 2, 1, 1, I444, uint8_t, 1, 1, 1, 10)
+TESTPLANARTOP(I012, uint16_t, 2, 2, 2, I420, uint8_t, 1, 2, 2, 12)
+TESTPLANARTOP(I212, uint16_t, 2, 2, 1, I420, uint8_t, 1, 2, 2, 12)
+TESTPLANARTOP(I212, uint16_t, 2, 2, 1, I422, uint8_t, 1, 2, 1, 12)
+TESTPLANARTOP(I412, uint16_t, 2, 1, 1, I420, uint8_t, 1, 2, 2, 12)
+TESTPLANARTOP(I412, uint16_t, 2, 1, 1, I444, uint8_t, 1, 1, 1, 12)
+
+// Test Android 420 to I420
+#define TESTAPLANARTOPI(SRC_FMT_PLANAR, PIXEL_STRIDE, SRC_SUBSAMP_X, \
+ SRC_SUBSAMP_Y, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
+ W1280, N, NEG, OFF, PN, OFF_U, OFF_V) \
+ TEST_F(LibYUVConvertTest, SRC_FMT_PLANAR##To##FMT_PLANAR##To##PN##N) { \
+ const int kWidth = W1280; \
+ const int kHeight = benchmark_height_; \
+ const int kSizeUV = \
+ SUBSAMPLE(kWidth, SRC_SUBSAMP_X) * SUBSAMPLE(kHeight, SRC_SUBSAMP_Y); \
+ align_buffer_page_end(src_y, kWidth* kHeight + OFF); \
+ align_buffer_page_end(src_uv, \
+ kSizeUV*((PIXEL_STRIDE == 3) ? 3 : 2) + OFF); \
+ align_buffer_page_end(dst_y_c, kWidth* kHeight); \
+ align_buffer_page_end(dst_u_c, SUBSAMPLE(kWidth, SUBSAMP_X) * \
+ SUBSAMPLE(kHeight, SUBSAMP_Y)); \
+ align_buffer_page_end(dst_v_c, SUBSAMPLE(kWidth, SUBSAMP_X) * \
+ SUBSAMPLE(kHeight, SUBSAMP_Y)); \
+ align_buffer_page_end(dst_y_opt, kWidth* kHeight); \
+ align_buffer_page_end(dst_u_opt, SUBSAMPLE(kWidth, SUBSAMP_X) * \
+ SUBSAMPLE(kHeight, SUBSAMP_Y)); \
+ align_buffer_page_end(dst_v_opt, SUBSAMPLE(kWidth, SUBSAMP_X) * \
+ SUBSAMPLE(kHeight, SUBSAMP_Y)); \
+ uint8_t* src_u = src_uv + OFF_U; \
+ uint8_t* src_v = src_uv + (PIXEL_STRIDE == 1 ? kSizeUV : OFF_V); \
+ int src_stride_uv = SUBSAMPLE(kWidth, SUBSAMP_X) * PIXEL_STRIDE; \
+ for (int i = 0; i < kHeight; ++i) \
+ for (int j = 0; j < kWidth; ++j) \
+ src_y[i * kWidth + j + OFF] = (fastrand() & 0xff); \
+ for (int i = 0; i < SUBSAMPLE(kHeight, SRC_SUBSAMP_Y); ++i) { \
+ for (int j = 0; j < SUBSAMPLE(kWidth, SRC_SUBSAMP_X); ++j) { \
+ src_u[(i * src_stride_uv) + j * PIXEL_STRIDE + OFF] = \
+ (fastrand() & 0xff); \
+ src_v[(i * src_stride_uv) + j * PIXEL_STRIDE + OFF] = \
+ (fastrand() & 0xff); \
+ } \
+ } \
+ memset(dst_y_c, 1, kWidth* kHeight); \
+ memset(dst_u_c, 2, \
+ SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
+ memset(dst_v_c, 3, \
+ SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
+ memset(dst_y_opt, 101, kWidth* kHeight); \
+ memset(dst_u_opt, 102, \
+ SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
+ memset(dst_v_opt, 103, \
+ SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
+ MaskCpuFlags(disable_cpu_flags_); \
+ SRC_FMT_PLANAR##To##FMT_PLANAR( \
+ src_y + OFF, kWidth, src_u + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X), \
+ src_v + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X), PIXEL_STRIDE, dst_y_c, \
+ kWidth, dst_u_c, SUBSAMPLE(kWidth, SUBSAMP_X), dst_v_c, \
+ SUBSAMPLE(kWidth, SUBSAMP_X), kWidth, NEG kHeight); \
+ MaskCpuFlags(benchmark_cpu_info_); \
+ for (int i = 0; i < benchmark_iterations_; ++i) { \
+ SRC_FMT_PLANAR##To##FMT_PLANAR( \
+ src_y + OFF, kWidth, src_u + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X), \
+ src_v + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X), PIXEL_STRIDE, \
+ dst_y_opt, kWidth, dst_u_opt, SUBSAMPLE(kWidth, SUBSAMP_X), \
+ dst_v_opt, SUBSAMPLE(kWidth, SUBSAMP_X), kWidth, NEG kHeight); \
+ } \
+ for (int i = 0; i < kHeight; ++i) { \
+ for (int j = 0; j < kWidth; ++j) { \
+ EXPECT_EQ(dst_y_c[i * kWidth + j], dst_y_opt[i * kWidth + j]); \
+ } \
+ } \
+ for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \
+ for (int j = 0; j < SUBSAMPLE(kWidth, SUBSAMP_X); ++j) { \
+ EXPECT_EQ(dst_u_c[i * SUBSAMPLE(kWidth, SUBSAMP_X) + j], \
+ dst_u_opt[i * SUBSAMPLE(kWidth, SUBSAMP_X) + j]); \
+ } \
+ } \
+ for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \
+ for (int j = 0; j < SUBSAMPLE(kWidth, SUBSAMP_X); ++j) { \
+ EXPECT_EQ(dst_v_c[i * SUBSAMPLE(kWidth, SUBSAMP_X) + j], \
+ dst_v_opt[i * SUBSAMPLE(kWidth, SUBSAMP_X) + j]); \
+ } \
+ } \
+ free_aligned_buffer_page_end(dst_y_c); \
+ free_aligned_buffer_page_end(dst_u_c); \
+ free_aligned_buffer_page_end(dst_v_c); \
+ free_aligned_buffer_page_end(dst_y_opt); \
+ free_aligned_buffer_page_end(dst_u_opt); \
+ free_aligned_buffer_page_end(dst_v_opt); \
+ free_aligned_buffer_page_end(src_y); \
+ free_aligned_buffer_page_end(src_uv); \
+ }
+
+#if defined(ENABLE_FULL_TESTS)
+#define TESTAPLANARTOP(SRC_FMT_PLANAR, PN, PIXEL_STRIDE, OFF_U, OFF_V, \
+ SRC_SUBSAMP_X, SRC_SUBSAMP_Y, FMT_PLANAR, SUBSAMP_X, \
+ SUBSAMP_Y) \
+ TESTAPLANARTOPI(SRC_FMT_PLANAR, PIXEL_STRIDE, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
+ FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, benchmark_width_ + 1, \
+ _Any, +, 0, PN, OFF_U, OFF_V) \
+ TESTAPLANARTOPI(SRC_FMT_PLANAR, PIXEL_STRIDE, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
+ FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, benchmark_width_, \
+ _Unaligned, +, 2, PN, OFF_U, OFF_V) \
+ TESTAPLANARTOPI(SRC_FMT_PLANAR, PIXEL_STRIDE, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
+ FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, benchmark_width_, _Invert, \
+ -, 0, PN, OFF_U, OFF_V) \
+ TESTAPLANARTOPI(SRC_FMT_PLANAR, PIXEL_STRIDE, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
+ FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, benchmark_width_, _Opt, +, \
+ 0, PN, OFF_U, OFF_V)
+#else
+#define TESTAPLANARTOP(SRC_FMT_PLANAR, PN, PIXEL_STRIDE, OFF_U, OFF_V, \
+ SRC_SUBSAMP_X, SRC_SUBSAMP_Y, FMT_PLANAR, SUBSAMP_X, \
+ SUBSAMP_Y) \
+ TESTAPLANARTOPI(SRC_FMT_PLANAR, PIXEL_STRIDE, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
+ FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, benchmark_width_, _Opt, +, \
+ 0, PN, OFF_U, OFF_V)
+#endif
+
+TESTAPLANARTOP(Android420, I420, 1, 0, 0, 2, 2, I420, 2, 2)
+TESTAPLANARTOP(Android420, NV12, 2, 0, 1, 2, 2, I420, 2, 2)
+TESTAPLANARTOP(Android420, NV21, 2, 1, 0, 2, 2, I420, 2, 2)
+#undef TESTAPLANARTOP
+#undef TESTAPLANARTOPI
+
+// wrapper to keep API the same
+int I400ToNV21(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* /* src_u */,
+ int /* src_stride_u */,
+ const uint8_t* /* src_v */,
+ int /* src_stride_v */,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ uint8_t* dst_vu,
+ int dst_stride_vu,
+ int width,
+ int height) {
+ return I400ToNV21(src_y, src_stride_y, dst_y, dst_stride_y, dst_vu,
+ dst_stride_vu, width, height);
+}
+
+#define TESTPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
+ SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, \
+ DST_SUBSAMP_X, DST_SUBSAMP_Y, W1280, N, NEG, OFF, \
+ SRC_DEPTH) \
+ TEST_F(LibYUVConvertTest, SRC_FMT_PLANAR##To##FMT_PLANAR##N) { \
+ static_assert(SRC_BPC == 1 || SRC_BPC == 2, "SRC BPC unsupported"); \
+ static_assert(DST_BPC == 1 || DST_BPC == 2, "DST BPC unsupported"); \
+ static_assert(SRC_SUBSAMP_X == 1 || SRC_SUBSAMP_X == 2, \
+ "SRC_SUBSAMP_X unsupported"); \
+ static_assert(SRC_SUBSAMP_Y == 1 || SRC_SUBSAMP_Y == 2, \
+ "SRC_SUBSAMP_Y unsupported"); \
+ static_assert(DST_SUBSAMP_X == 1 || DST_SUBSAMP_X == 2, \
+ "DST_SUBSAMP_X unsupported"); \
+ static_assert(DST_SUBSAMP_Y == 1 || DST_SUBSAMP_Y == 2, \
+ "DST_SUBSAMP_Y unsupported"); \
+ const int kWidth = W1280; \
+ const int kHeight = benchmark_height_; \
+ const int kSrcHalfWidth = SUBSAMPLE(kWidth, SRC_SUBSAMP_X); \
+ const int kSrcHalfHeight = SUBSAMPLE(kHeight, SRC_SUBSAMP_Y); \
+ const int kDstHalfWidth = SUBSAMPLE(kWidth, DST_SUBSAMP_X); \
+ const int kDstHalfHeight = SUBSAMPLE(kHeight, DST_SUBSAMP_Y); \
+ align_buffer_page_end(src_y, kWidth* kHeight* SRC_BPC + OFF); \
+ align_buffer_page_end(src_u, \
+ kSrcHalfWidth* kSrcHalfHeight* SRC_BPC + OFF); \
+ align_buffer_page_end(src_v, \
+ kSrcHalfWidth* kSrcHalfHeight* SRC_BPC + OFF); \
+ align_buffer_page_end(dst_y_c, kWidth* kHeight* DST_BPC); \
+ align_buffer_page_end(dst_uv_c, \
+ kDstHalfWidth* kDstHalfHeight* DST_BPC * 2); \
+ align_buffer_page_end(dst_y_opt, kWidth* kHeight* DST_BPC); \
+ align_buffer_page_end(dst_uv_opt, \
+ kDstHalfWidth* kDstHalfHeight* DST_BPC * 2); \
+ MemRandomize(src_y + OFF, kWidth * kHeight * SRC_BPC); \
+ MemRandomize(src_u + OFF, kSrcHalfWidth * kSrcHalfHeight * SRC_BPC); \
+ MemRandomize(src_v + OFF, kSrcHalfWidth * kSrcHalfHeight * SRC_BPC); \
+ SRC_T* src_y_p = reinterpret_cast<SRC_T*>(src_y + OFF); \
+ SRC_T* src_u_p = reinterpret_cast<SRC_T*>(src_u + OFF); \
+ SRC_T* src_v_p = reinterpret_cast<SRC_T*>(src_v + OFF); \
+ for (int i = 0; i < kWidth * kHeight; ++i) { \
+ src_y_p[i] = src_y_p[i] & ((1 << SRC_DEPTH) - 1); \
+ } \
+ for (int i = 0; i < kSrcHalfWidth * kSrcHalfHeight; ++i) { \
+ src_u_p[i] = src_u_p[i] & ((1 << SRC_DEPTH) - 1); \
+ src_v_p[i] = src_v_p[i] & ((1 << SRC_DEPTH) - 1); \
+ } \
+ memset(dst_y_c, 1, kWidth* kHeight* DST_BPC); \
+ memset(dst_uv_c, 2, kDstHalfWidth* kDstHalfHeight* DST_BPC * 2); \
+ memset(dst_y_opt, 101, kWidth* kHeight* DST_BPC); \
+ memset(dst_uv_opt, 102, kDstHalfWidth* kDstHalfHeight* DST_BPC * 2); \
+ MaskCpuFlags(disable_cpu_flags_); \
+ SRC_FMT_PLANAR##To##FMT_PLANAR(src_y_p, kWidth, src_u_p, kSrcHalfWidth, \
+ src_v_p, kSrcHalfWidth, \
+ reinterpret_cast<DST_T*>(dst_y_c), kWidth, \
+ reinterpret_cast<DST_T*>(dst_uv_c), \
+ kDstHalfWidth * 2, kWidth, NEG kHeight); \
+ MaskCpuFlags(benchmark_cpu_info_); \
+ for (int i = 0; i < benchmark_iterations_; ++i) { \
+ SRC_FMT_PLANAR##To##FMT_PLANAR( \
+ src_y_p, kWidth, src_u_p, kSrcHalfWidth, src_v_p, kSrcHalfWidth, \
+ reinterpret_cast<DST_T*>(dst_y_opt), kWidth, \
+ reinterpret_cast<DST_T*>(dst_uv_opt), kDstHalfWidth * 2, kWidth, \
+ NEG kHeight); \
+ } \
+ for (int i = 0; i < kHeight * kWidth * DST_BPC; ++i) { \
+ EXPECT_EQ(dst_y_c[i], dst_y_opt[i]); \
+ } \
+ for (int i = 0; i < kDstHalfWidth * kDstHalfHeight * DST_BPC * 2; ++i) { \
+ EXPECT_EQ(dst_uv_c[i], dst_uv_opt[i]); \
+ } \
+ free_aligned_buffer_page_end(dst_y_c); \
+ free_aligned_buffer_page_end(dst_uv_c); \
+ free_aligned_buffer_page_end(dst_y_opt); \
+ free_aligned_buffer_page_end(dst_uv_opt); \
+ free_aligned_buffer_page_end(src_y); \
+ free_aligned_buffer_page_end(src_u); \
+ free_aligned_buffer_page_end(src_v); \
+ }
+
+#if defined(ENABLE_FULL_TESTS)
+#define TESTPLANARTOBP(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
+ SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, \
+ DST_SUBSAMP_X, DST_SUBSAMP_Y, SRC_DEPTH) \
+ TESTPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
+ SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \
+ DST_SUBSAMP_Y, benchmark_width_ + 1, _Any, +, 0, SRC_DEPTH) \
+ TESTPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
+ SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \
+ DST_SUBSAMP_Y, benchmark_width_, _Unaligned, +, 2, \
+ SRC_DEPTH) \
+ TESTPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
+ SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \
+ DST_SUBSAMP_Y, benchmark_width_, _Invert, -, 0, SRC_DEPTH) \
+ TESTPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
+ SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \
+ DST_SUBSAMP_Y, benchmark_width_, _Opt, +, 0, SRC_DEPTH)
+#else
+#define TESTPLANARTOBP(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
+ SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, \
+ DST_SUBSAMP_X, DST_SUBSAMP_Y, SRC_DEPTH) \
+ TESTPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
+ SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \
+ DST_SUBSAMP_Y, benchmark_width_, _Opt, +, 0, SRC_DEPTH)
+#endif
+
+TESTPLANARTOBP(I420, uint8_t, 1, 2, 2, NV12, uint8_t, 1, 2, 2, 8)
+TESTPLANARTOBP(I420, uint8_t, 1, 2, 2, NV21, uint8_t, 1, 2, 2, 8)
+TESTPLANARTOBP(I422, uint8_t, 1, 2, 1, NV21, uint8_t, 1, 2, 2, 8)
+TESTPLANARTOBP(I444, uint8_t, 1, 1, 1, NV12, uint8_t, 1, 2, 2, 8)
+TESTPLANARTOBP(I444, uint8_t, 1, 1, 1, NV21, uint8_t, 1, 2, 2, 8)
+TESTPLANARTOBP(I400, uint8_t, 1, 2, 2, NV21, uint8_t, 1, 2, 2, 8)
+TESTPLANARTOBP(I010, uint16_t, 2, 2, 2, P010, uint16_t, 2, 2, 2, 10)
+TESTPLANARTOBP(I210, uint16_t, 2, 2, 1, P210, uint16_t, 2, 2, 1, 10)
+TESTPLANARTOBP(I012, uint16_t, 2, 2, 2, P012, uint16_t, 2, 2, 2, 12)
+TESTPLANARTOBP(I212, uint16_t, 2, 2, 1, P212, uint16_t, 2, 2, 1, 12)
+
+#define TESTBPTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
+ SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \
+ DST_SUBSAMP_Y, W1280, N, NEG, OFF, DOY, SRC_DEPTH, \
+ TILE_WIDTH, TILE_HEIGHT) \
+ TEST_F(LibYUVConvertTest, SRC_FMT_PLANAR##To##FMT_PLANAR##N) { \
+ static_assert(DST_BPC == 1 || DST_BPC == 2, "DST BPC unsupported"); \
+ static_assert(SRC_SUBSAMP_X == 1 || SRC_SUBSAMP_X == 2, \
+ "SRC_SUBSAMP_X unsupported"); \
+ static_assert(SRC_SUBSAMP_Y == 1 || SRC_SUBSAMP_Y == 2, \
+ "SRC_SUBSAMP_Y unsupported"); \
+ static_assert(DST_SUBSAMP_X == 1 || DST_SUBSAMP_X == 2, \
+ "DST_SUBSAMP_X unsupported"); \
+ static_assert(DST_SUBSAMP_Y == 1 || DST_SUBSAMP_Y == 2, \
+ "DST_SUBSAMP_Y unsupported"); \
+ const int kWidth = W1280; \
+ const int kHeight = benchmark_height_; \
+ const int kSrcHalfWidth = SUBSAMPLE(kWidth, SRC_SUBSAMP_X); \
+ const int kDstHalfWidth = SUBSAMPLE(kWidth, DST_SUBSAMP_X); \
+ const int kDstHalfHeight = SUBSAMPLE(kHeight, DST_SUBSAMP_Y); \
+ const int kPaddedWidth = (kWidth + (TILE_WIDTH - 1)) & ~(TILE_WIDTH - 1); \
+ const int kPaddedHeight = \
+ (kHeight + (TILE_HEIGHT - 1)) & ~(TILE_HEIGHT - 1); \
+ const int kSrcHalfPaddedWidth = SUBSAMPLE(kPaddedWidth, SRC_SUBSAMP_X); \
+ const int kSrcHalfPaddedHeight = SUBSAMPLE(kPaddedHeight, SRC_SUBSAMP_Y); \
+ align_buffer_page_end(src_y, kPaddedWidth* kPaddedHeight* SRC_BPC + OFF); \
+ align_buffer_page_end( \
+ src_uv, \
+ 2 * kSrcHalfPaddedWidth * kSrcHalfPaddedHeight * SRC_BPC + OFF); \
+ align_buffer_page_end(dst_y_c, kWidth* kHeight* DST_BPC); \
+ align_buffer_page_end(dst_uv_c, \
+ 2 * kDstHalfWidth * kDstHalfHeight * DST_BPC); \
+ align_buffer_page_end(dst_y_opt, kWidth* kHeight* DST_BPC); \
+ align_buffer_page_end(dst_uv_opt, \
+ 2 * kDstHalfWidth * kDstHalfHeight * DST_BPC); \
+ SRC_T* src_y_p = reinterpret_cast<SRC_T*>(src_y + OFF); \
+ SRC_T* src_uv_p = reinterpret_cast<SRC_T*>(src_uv + OFF); \
+ for (int i = 0; \
+ i < kPaddedWidth * kPaddedHeight * SRC_BPC / (int)sizeof(SRC_T); \
+ ++i) { \
+ src_y_p[i] = \
+ (fastrand() & (((SRC_T)(-1)) << ((8 * SRC_BPC) - SRC_DEPTH))); \
+ } \
+ for (int i = 0; i < kSrcHalfPaddedWidth * kSrcHalfPaddedHeight * 2 * \
+ SRC_BPC / (int)sizeof(SRC_T); \
+ ++i) { \
+ src_uv_p[i] = \
+ (fastrand() & (((SRC_T)(-1)) << ((8 * SRC_BPC) - SRC_DEPTH))); \
+ } \
+ memset(dst_y_c, 1, kWidth* kHeight* DST_BPC); \
+ memset(dst_uv_c, 2, 2 * kDstHalfWidth * kDstHalfHeight * DST_BPC); \
+ memset(dst_y_opt, 101, kWidth* kHeight* DST_BPC); \
+ memset(dst_uv_opt, 102, 2 * kDstHalfWidth * kDstHalfHeight * DST_BPC); \
+ MaskCpuFlags(disable_cpu_flags_); \
+ SRC_FMT_PLANAR##To##FMT_PLANAR( \
+ src_y_p, kWidth* SRC_BPC / (int)sizeof(SRC_T), src_uv_p, \
+ 2 * kSrcHalfWidth * SRC_BPC / (int)sizeof(SRC_T), \
+ DOY ? reinterpret_cast<DST_T*>(dst_y_c) : NULL, kWidth, \
+ reinterpret_cast<DST_T*>(dst_uv_c), 2 * kDstHalfWidth, kWidth, \
+ NEG kHeight); \
+ MaskCpuFlags(benchmark_cpu_info_); \
+ for (int i = 0; i < benchmark_iterations_; ++i) { \
+ SRC_FMT_PLANAR##To##FMT_PLANAR( \
+ src_y_p, kWidth* SRC_BPC / (int)sizeof(SRC_T), src_uv_p, \
+ 2 * kSrcHalfWidth * SRC_BPC / (int)sizeof(SRC_T), \
+ DOY ? reinterpret_cast<DST_T*>(dst_y_opt) : NULL, kWidth, \
+ reinterpret_cast<DST_T*>(dst_uv_opt), 2 * kDstHalfWidth, kWidth, \
+ NEG kHeight); \
+ } \
+ if (DOY) { \
+ for (int i = 0; i < kHeight; ++i) { \
+ for (int j = 0; j < kWidth; ++j) { \
+ EXPECT_EQ(dst_y_c[i * kWidth + j], dst_y_opt[i * kWidth + j]); \
+ } \
+ } \
+ } \
+ for (int i = 0; i < kDstHalfHeight; ++i) { \
+ for (int j = 0; j < 2 * kDstHalfWidth; ++j) { \
+ EXPECT_EQ(dst_uv_c[i * 2 * kDstHalfWidth + j], \
+ dst_uv_opt[i * 2 * kDstHalfWidth + j]); \
+ } \
+ } \
+ free_aligned_buffer_page_end(dst_y_c); \
+ free_aligned_buffer_page_end(dst_uv_c); \
+ free_aligned_buffer_page_end(dst_y_opt); \
+ free_aligned_buffer_page_end(dst_uv_opt); \
+ free_aligned_buffer_page_end(src_y); \
+ free_aligned_buffer_page_end(src_uv); \
+ }
+
+#if defined(ENABLE_FULL_TESTS)
+#define TESTBPTOBP(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
+ SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \
+ DST_SUBSAMP_Y, SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT) \
+ TESTBPTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
+ FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \
+ benchmark_width_ + 1, _Any, +, 0, 1, SRC_DEPTH, TILE_WIDTH, \
+ TILE_HEIGHT) \
+ TESTBPTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
+ FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \
+ benchmark_width_, _Unaligned, +, 2, 1, SRC_DEPTH, TILE_WIDTH, \
+ TILE_HEIGHT) \
+ TESTBPTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
+ FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \
+ benchmark_width_, _Invert, -, 0, 1, SRC_DEPTH, TILE_WIDTH, \
+ TILE_HEIGHT) \
+ TESTBPTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
+ FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \
+ benchmark_width_, _Opt, +, 0, 1, SRC_DEPTH, TILE_WIDTH, \
+ TILE_HEIGHT) \
+ TESTBPTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
+ FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \
+ benchmark_width_, _NullY, +, 0, 0, SRC_DEPTH, TILE_WIDTH, \
+ TILE_HEIGHT)
+#else
+#define TESTBPTOBP(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
+ SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \
+ DST_SUBSAMP_Y, SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT) \
+ TESTBPTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
+ FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \
+ benchmark_width_, _NullY, +, 0, 0, SRC_DEPTH, TILE_WIDTH, \
+ TILE_HEIGHT)
+#endif
+
+TESTBPTOBP(NV21, uint8_t, 1, 2, 2, NV12, uint8_t, 1, 2, 2, 8, 1, 1)
+TESTBPTOBP(NV12, uint8_t, 1, 2, 2, NV12Mirror, uint8_t, 1, 2, 2, 8, 1, 1)
+TESTBPTOBP(NV12, uint8_t, 1, 2, 2, NV24, uint8_t, 1, 1, 1, 8, 1, 1)
+TESTBPTOBP(NV16, uint8_t, 1, 2, 1, NV24, uint8_t, 1, 1, 1, 8, 1, 1)
+TESTBPTOBP(P010, uint16_t, 2, 2, 2, P410, uint16_t, 2, 1, 1, 10, 1, 1)
+TESTBPTOBP(P210, uint16_t, 2, 2, 1, P410, uint16_t, 2, 1, 1, 10, 1, 1)
+TESTBPTOBP(P012, uint16_t, 2, 2, 2, P412, uint16_t, 2, 1, 1, 10, 1, 1)
+TESTBPTOBP(P212, uint16_t, 2, 2, 1, P412, uint16_t, 2, 1, 1, 12, 1, 1)
+TESTBPTOBP(P016, uint16_t, 2, 2, 2, P416, uint16_t, 2, 1, 1, 12, 1, 1)
+TESTBPTOBP(P216, uint16_t, 2, 2, 1, P416, uint16_t, 2, 1, 1, 12, 1, 1)
+TESTBPTOBP(MM21, uint8_t, 1, 2, 2, NV12, uint8_t, 1, 2, 2, 8, 16, 32)
+TESTBPTOBP(MT2T, uint8_t, 10 / 8, 2, 2, P010, uint16_t, 2, 2, 2, 10, 16, 32)
+
+#define TESTATOPLANARI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
+ W1280, N, NEG, OFF) \
+ TEST_F(LibYUVConvertTest, FMT_A##To##FMT_PLANAR##N) { \
+ const int kWidth = W1280; \
+ const int kHeight = ALIGNINT(benchmark_height_, YALIGN); \
+ const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X); \
+ const int kStride = (kStrideUV * SUBSAMP_X * 8 * BPP_A + 7) / 8; \
+ align_buffer_page_end(src_argb, kStride* kHeight + OFF); \
+ align_buffer_page_end(dst_y_c, kWidth* kHeight); \
+ align_buffer_page_end(dst_uv_c, \
+ kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
+ align_buffer_page_end(dst_y_opt, kWidth* kHeight); \
+ align_buffer_page_end(dst_uv_opt, \
+ kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
+ memset(dst_y_c, 1, kWidth* kHeight); \
+ memset(dst_uv_c, 2, kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
+ memset(dst_y_opt, 101, kWidth* kHeight); \
+ memset(dst_uv_opt, 102, kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
+ for (int i = 0; i < kHeight; ++i) \
+ for (int j = 0; j < kStride; ++j) \
+ src_argb[(i * kStride) + j + OFF] = (fastrand() & 0xff); \
+ MaskCpuFlags(disable_cpu_flags_); \
+ FMT_A##To##FMT_PLANAR(src_argb + OFF, kStride, dst_y_c, kWidth, dst_uv_c, \
+ kStrideUV * 2, dst_uv_c + kStrideUV, kStrideUV * 2, \
+ kWidth, NEG kHeight); \
+ MaskCpuFlags(benchmark_cpu_info_); \
+ for (int i = 0; i < benchmark_iterations_; ++i) { \
+ FMT_A##To##FMT_PLANAR(src_argb + OFF, kStride, dst_y_opt, kWidth, \
+ dst_uv_opt, kStrideUV * 2, dst_uv_opt + kStrideUV, \
+ kStrideUV * 2, kWidth, NEG kHeight); \
+ } \
+ for (int i = 0; i < kHeight; ++i) { \
+ for (int j = 0; j < kWidth; ++j) { \
+ EXPECT_EQ(dst_y_c[i * kWidth + j], dst_y_opt[i * kWidth + j]); \
+ } \
+ } \
+ for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y) * 2; ++i) { \
+ for (int j = 0; j < kStrideUV; ++j) { \
+ EXPECT_EQ(dst_uv_c[i * kStrideUV + j], dst_uv_opt[i * kStrideUV + j]); \
+ } \
+ } \
+ free_aligned_buffer_page_end(dst_y_c); \
+ free_aligned_buffer_page_end(dst_uv_c); \
+ free_aligned_buffer_page_end(dst_y_opt); \
+ free_aligned_buffer_page_end(dst_uv_opt); \
+ free_aligned_buffer_page_end(src_argb); \
+ }
+
+#if defined(ENABLE_FULL_TESTS)
+#define TESTATOPLANAR(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y) \
+ TESTATOPLANARI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
+ benchmark_width_ + 1, _Any, +, 0) \
+ TESTATOPLANARI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
+ benchmark_width_, _Unaligned, +, 2) \
+ TESTATOPLANARI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
+ benchmark_width_, _Invert, -, 0) \
+ TESTATOPLANARI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
+ benchmark_width_, _Opt, +, 0)
+#else
+#define TESTATOPLANAR(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y) \
+ TESTATOPLANARI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
+ benchmark_width_, _Opt, +, 0)
+#endif
+
+TESTATOPLANAR(ABGR, 4, 1, I420, 2, 2)
+TESTATOPLANAR(ARGB, 4, 1, I420, 2, 2)
+TESTATOPLANAR(ARGB, 4, 1, I422, 2, 1)
+TESTATOPLANAR(ARGB, 4, 1, I444, 1, 1)
+TESTATOPLANAR(ARGB, 4, 1, J420, 2, 2)
+TESTATOPLANAR(ARGB, 4, 1, J422, 2, 1)
+TESTATOPLANAR(ABGR, 4, 1, J420, 2, 2)
+TESTATOPLANAR(ABGR, 4, 1, J422, 2, 1)
+#ifdef LITTLE_ENDIAN_ONLY_TEST
+TESTATOPLANAR(ARGB4444, 2, 1, I420, 2, 2)
+TESTATOPLANAR(RGB565, 2, 1, I420, 2, 2)
+TESTATOPLANAR(ARGB1555, 2, 1, I420, 2, 2)
+#endif
+TESTATOPLANAR(BGRA, 4, 1, I420, 2, 2)
+TESTATOPLANAR(I400, 1, 1, I420, 2, 2)
+TESTATOPLANAR(J400, 1, 1, J420, 2, 2)
+TESTATOPLANAR(RAW, 3, 1, I420, 2, 2)
+TESTATOPLANAR(RAW, 3, 1, J420, 2, 2)
+TESTATOPLANAR(RGB24, 3, 1, I420, 2, 2)
+TESTATOPLANAR(RGB24, 3, 1, J420, 2, 2)
+TESTATOPLANAR(RGBA, 4, 1, I420, 2, 2)
+TESTATOPLANAR(UYVY, 2, 1, I420, 2, 2)
+TESTATOPLANAR(UYVY, 2, 1, I422, 2, 1)
+TESTATOPLANAR(YUY2, 2, 1, I420, 2, 2)
+TESTATOPLANAR(YUY2, 2, 1, I422, 2, 1)
+
+#define TESTATOPLANARAI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, \
+ SUBSAMP_Y, W1280, N, NEG, OFF) \
+ TEST_F(LibYUVConvertTest, FMT_A##To##FMT_PLANAR##N) { \
+ const int kWidth = W1280; \
+ const int kHeight = ALIGNINT(benchmark_height_, YALIGN); \
+ const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X); \
+ const int kStride = (kStrideUV * SUBSAMP_X * 8 * BPP_A + 7) / 8; \
+ align_buffer_page_end(src_argb, kStride* kHeight + OFF); \
+ align_buffer_page_end(dst_a_c, kWidth* kHeight); \
+ align_buffer_page_end(dst_y_c, kWidth* kHeight); \
+ align_buffer_page_end(dst_uv_c, \
+ kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
+ align_buffer_page_end(dst_a_opt, kWidth* kHeight); \
+ align_buffer_page_end(dst_y_opt, kWidth* kHeight); \
+ align_buffer_page_end(dst_uv_opt, \
+ kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
+ memset(dst_a_c, 1, kWidth* kHeight); \
+ memset(dst_y_c, 2, kWidth* kHeight); \
+ memset(dst_uv_c, 3, kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
+ memset(dst_a_opt, 101, kWidth* kHeight); \
+ memset(dst_y_opt, 102, kWidth* kHeight); \
+ memset(dst_uv_opt, 103, kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
+ for (int i = 0; i < kHeight; ++i) \
+ for (int j = 0; j < kStride; ++j) \
+ src_argb[(i * kStride) + j + OFF] = (fastrand() & 0xff); \
+ MaskCpuFlags(disable_cpu_flags_); \
+ FMT_A##To##FMT_PLANAR(src_argb + OFF, kStride, dst_y_c, kWidth, dst_uv_c, \
+ kStrideUV * 2, dst_uv_c + kStrideUV, kStrideUV * 2, \
+ dst_a_c, kWidth, kWidth, NEG kHeight); \
+ MaskCpuFlags(benchmark_cpu_info_); \
+ for (int i = 0; i < benchmark_iterations_; ++i) { \
+ FMT_A##To##FMT_PLANAR(src_argb + OFF, kStride, dst_y_opt, kWidth, \
+ dst_uv_opt, kStrideUV * 2, dst_uv_opt + kStrideUV, \
+ kStrideUV * 2, dst_a_opt, kWidth, kWidth, \
+ NEG kHeight); \
+ } \
+ for (int i = 0; i < kHeight; ++i) { \
+ for (int j = 0; j < kWidth; ++j) { \
+ EXPECT_EQ(dst_y_c[i * kWidth + j], dst_y_opt[i * kWidth + j]); \
+ EXPECT_EQ(dst_a_c[i * kWidth + j], dst_a_opt[i * kWidth + j]); \
+ } \
+ } \
+ for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y) * 2; ++i) { \
+ for (int j = 0; j < kStrideUV; ++j) { \
+ EXPECT_EQ(dst_uv_c[i * kStrideUV + j], dst_uv_opt[i * kStrideUV + j]); \
+ } \
+ } \
+ free_aligned_buffer_page_end(dst_a_c); \
+ free_aligned_buffer_page_end(dst_y_c); \
+ free_aligned_buffer_page_end(dst_uv_c); \
+ free_aligned_buffer_page_end(dst_a_opt); \
+ free_aligned_buffer_page_end(dst_y_opt); \
+ free_aligned_buffer_page_end(dst_uv_opt); \
+ free_aligned_buffer_page_end(src_argb); \
+ }
+
+#if defined(ENABLE_FULL_TESTS)
+#define TESTATOPLANARA(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y) \
+ TESTATOPLANARAI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
+ benchmark_width_ + 1, _Any, +, 0) \
+ TESTATOPLANARAI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
+ benchmark_width_, _Unaligned, +, 2) \
+ TESTATOPLANARAI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
+ benchmark_width_, _Invert, -, 0) \
+ TESTATOPLANARAI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
+ benchmark_width_, _Opt, +, 0)
+#else
+#define TESTATOPLANARA(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y) \
+ TESTATOPLANARAI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
+ benchmark_width_, _Opt, +, 0)
+#endif
+
+TESTATOPLANARA(ARGB, 4, 1, I420Alpha, 2, 2)
+
+#define TESTATOBPI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
+ W1280, N, NEG, OFF) \
+ TEST_F(LibYUVConvertTest, FMT_A##To##FMT_PLANAR##N) { \
+ const int kWidth = W1280; \
+ const int kHeight = benchmark_height_; \
+ const int kStride = SUBSAMPLE(kWidth, SUB_A) * BPP_A; \
+ const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X); \
+ align_buffer_page_end(src_argb, kStride* kHeight + OFF); \
+ align_buffer_page_end(dst_y_c, kWidth* kHeight); \
+ align_buffer_page_end(dst_uv_c, \
+ kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
+ align_buffer_page_end(dst_y_opt, kWidth* kHeight); \
+ align_buffer_page_end(dst_uv_opt, \
+ kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
+ for (int i = 0; i < kHeight; ++i) \
+ for (int j = 0; j < kStride; ++j) \
+ src_argb[(i * kStride) + j + OFF] = (fastrand() & 0xff); \
+ memset(dst_y_c, 1, kWidth* kHeight); \
+ memset(dst_uv_c, 2, kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
+ memset(dst_y_opt, 101, kWidth* kHeight); \
+ memset(dst_uv_opt, 102, kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
+ MaskCpuFlags(disable_cpu_flags_); \
+ FMT_A##To##FMT_PLANAR(src_argb + OFF, kStride, dst_y_c, kWidth, dst_uv_c, \
+ kStrideUV * 2, kWidth, NEG kHeight); \
+ MaskCpuFlags(benchmark_cpu_info_); \
+ for (int i = 0; i < benchmark_iterations_; ++i) { \
+ FMT_A##To##FMT_PLANAR(src_argb + OFF, kStride, dst_y_opt, kWidth, \
+ dst_uv_opt, kStrideUV * 2, kWidth, NEG kHeight); \
+ } \
+ for (int i = 0; i < kHeight; ++i) { \
+ for (int j = 0; j < kWidth; ++j) { \
+ EXPECT_EQ(dst_y_c[i * kWidth + j], dst_y_opt[i * kWidth + j]); \
+ } \
+ } \
+ for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \
+ for (int j = 0; j < kStrideUV * 2; ++j) { \
+ EXPECT_EQ(dst_uv_c[i * kStrideUV * 2 + j], \
+ dst_uv_opt[i * kStrideUV * 2 + j]); \
+ } \
+ } \
+ free_aligned_buffer_page_end(dst_y_c); \
+ free_aligned_buffer_page_end(dst_uv_c); \
+ free_aligned_buffer_page_end(dst_y_opt); \
+ free_aligned_buffer_page_end(dst_uv_opt); \
+ free_aligned_buffer_page_end(src_argb); \
+ }
+
+#if defined(ENABLE_FULL_TESTS)
+#define TESTATOBP(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y) \
+ TESTATOBPI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
+ benchmark_width_ + 1, _Any, +, 0) \
+ TESTATOBPI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
+ benchmark_width_, _Unaligned, +, 2) \
+ TESTATOBPI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
+ benchmark_width_, _Invert, -, 0) \
+ TESTATOBPI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
+ benchmark_width_, _Opt, +, 0)
+#else
+#define TESTATOBP(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y) \
+ TESTATOBPI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
+ benchmark_width_, _Opt, +, 0)
+#endif
+
+TESTATOBP(ARGB, 1, 4, NV12, 2, 2)
+TESTATOBP(ARGB, 1, 4, NV21, 2, 2)
+TESTATOBP(ABGR, 1, 4, NV12, 2, 2)
+TESTATOBP(ABGR, 1, 4, NV21, 2, 2)
+TESTATOBP(RAW, 1, 3, JNV21, 2, 2)
+TESTATOBP(YUY2, 2, 4, NV12, 2, 2)
+TESTATOBP(UYVY, 2, 4, NV12, 2, 2)
+TESTATOBP(AYUV, 1, 4, NV12, 2, 2)
+TESTATOBP(AYUV, 1, 4, NV21, 2, 2)
+
+#if !defined(LEAN_TESTS)
+
+#ifdef HAVE_JPEG
+TEST_F(LibYUVConvertTest, ValidateJpeg) {
+ const int kOff = 10;
+ const int kMinJpeg = 64;
+ const int kImageSize = benchmark_width_ * benchmark_height_ >= kMinJpeg
+ ? benchmark_width_ * benchmark_height_
+ : kMinJpeg;
+ const int kSize = kImageSize + kOff;
+ align_buffer_page_end(orig_pixels, kSize);
+
+ // No SOI or EOI. Expect fail.
+ memset(orig_pixels, 0, kSize);
+ EXPECT_FALSE(ValidateJpeg(orig_pixels, kSize));
+
+ // Test special value that matches marker start.
+ memset(orig_pixels, 0xff, kSize);
+ EXPECT_FALSE(ValidateJpeg(orig_pixels, kSize));
+
+ // EOI, SOI. Expect pass.
+ orig_pixels[0] = 0xff;
+ orig_pixels[1] = 0xd8; // SOI.
+ orig_pixels[2] = 0xff;
+ orig_pixels[kSize - kOff + 0] = 0xff;
+ orig_pixels[kSize - kOff + 1] = 0xd9; // EOI.
+ for (int times = 0; times < benchmark_iterations_; ++times) {
+ EXPECT_TRUE(ValidateJpeg(orig_pixels, kSize));
+ }
+ free_aligned_buffer_page_end(orig_pixels);
+}
+
+TEST_F(LibYUVConvertTest, ValidateJpegLarge) {
+ const int kOff = 10;
+ const int kMinJpeg = 64;
+ const int kImageSize = benchmark_width_ * benchmark_height_ >= kMinJpeg
+ ? benchmark_width_ * benchmark_height_
+ : kMinJpeg;
+ const int kSize = kImageSize + kOff;
+ const int kMultiple = 10;
+ const int kBufSize = kImageSize * kMultiple + kOff;
+ align_buffer_page_end(orig_pixels, kBufSize);
+
+ // No SOI or EOI. Expect fail.
+ memset(orig_pixels, 0, kBufSize);
+ EXPECT_FALSE(ValidateJpeg(orig_pixels, kBufSize));
+
+ // EOI, SOI. Expect pass.
+ orig_pixels[0] = 0xff;
+ orig_pixels[1] = 0xd8; // SOI.
+ orig_pixels[2] = 0xff;
+ orig_pixels[kSize - kOff + 0] = 0xff;
+ orig_pixels[kSize - kOff + 1] = 0xd9; // EOI.
+ for (int times = 0; times < benchmark_iterations_; ++times) {
+ EXPECT_TRUE(ValidateJpeg(orig_pixels, kBufSize));
+ }
+ free_aligned_buffer_page_end(orig_pixels);
+}
+
+TEST_F(LibYUVConvertTest, InvalidateJpeg) {
+ const int kOff = 10;
+ const int kMinJpeg = 64;
+ const int kImageSize = benchmark_width_ * benchmark_height_ >= kMinJpeg
+ ? benchmark_width_ * benchmark_height_
+ : kMinJpeg;
+ const int kSize = kImageSize + kOff;
+ align_buffer_page_end(orig_pixels, kSize);
+
+ // NULL pointer. Expect fail.
+ EXPECT_FALSE(ValidateJpeg(NULL, kSize));
+
+ // Negative size. Expect fail.
+ EXPECT_FALSE(ValidateJpeg(orig_pixels, -1));
+
+ // Too large size. Expect fail.
+ EXPECT_FALSE(ValidateJpeg(orig_pixels, 0xfb000000ull));
+
+ // No SOI or EOI. Expect fail.
+ memset(orig_pixels, 0, kSize);
+ EXPECT_FALSE(ValidateJpeg(orig_pixels, kSize));
+
+ // SOI but no EOI. Expect fail.
+ orig_pixels[0] = 0xff;
+ orig_pixels[1] = 0xd8; // SOI.
+ orig_pixels[2] = 0xff;
+ for (int times = 0; times < benchmark_iterations_; ++times) {
+ EXPECT_FALSE(ValidateJpeg(orig_pixels, kSize));
+ }
+
+ // EOI but no SOI. Expect fail.
+ orig_pixels[0] = 0;
+ orig_pixels[1] = 0;
+ orig_pixels[kSize - kOff + 0] = 0xff;
+ orig_pixels[kSize - kOff + 1] = 0xd9; // EOI.
+ EXPECT_FALSE(ValidateJpeg(orig_pixels, kSize));
+
+ free_aligned_buffer_page_end(orig_pixels);
+}
+
+TEST_F(LibYUVConvertTest, FuzzJpeg) {
+ // SOI but no EOI. Expect fail.
+ for (int times = 0; times < benchmark_iterations_; ++times) {
+ const int kSize = fastrand() % 5000 + 3;
+ align_buffer_page_end(orig_pixels, kSize);
+ MemRandomize(orig_pixels, kSize);
+
+ // Add SOI so frame will be scanned.
+ orig_pixels[0] = 0xff;
+ orig_pixels[1] = 0xd8; // SOI.
+ orig_pixels[2] = 0xff;
+ orig_pixels[kSize - 1] = 0xff;
+ ValidateJpeg(orig_pixels,
+ kSize); // Failure normally expected.
+ free_aligned_buffer_page_end(orig_pixels);
+ }
+}
+
+// Test data created in GIMP. In export jpeg, disable
+// thumbnails etc, choose a subsampling, and use low quality
+// (50) to keep size small. Generated with xxd -i test.jpg
+// test 0 is J400
+static const uint8_t kTest0Jpg[] = {
+ 0xff, 0xd8, 0xff, 0xe0, 0x00, 0x10, 0x4a, 0x46, 0x49, 0x46, 0x00, 0x01,
+ 0x01, 0x01, 0x00, 0x48, 0x00, 0x48, 0x00, 0x00, 0xff, 0xdb, 0x00, 0x43,
+ 0x00, 0x10, 0x0b, 0x0c, 0x0e, 0x0c, 0x0a, 0x10, 0x0e, 0x0d, 0x0e, 0x12,
+ 0x11, 0x10, 0x13, 0x18, 0x28, 0x1a, 0x18, 0x16, 0x16, 0x18, 0x31, 0x23,
+ 0x25, 0x1d, 0x28, 0x3a, 0x33, 0x3d, 0x3c, 0x39, 0x33, 0x38, 0x37, 0x40,
+ 0x48, 0x5c, 0x4e, 0x40, 0x44, 0x57, 0x45, 0x37, 0x38, 0x50, 0x6d, 0x51,
+ 0x57, 0x5f, 0x62, 0x67, 0x68, 0x67, 0x3e, 0x4d, 0x71, 0x79, 0x70, 0x64,
+ 0x78, 0x5c, 0x65, 0x67, 0x63, 0xff, 0xc2, 0x00, 0x0b, 0x08, 0x00, 0x10,
+ 0x00, 0x20, 0x01, 0x01, 0x11, 0x00, 0xff, 0xc4, 0x00, 0x17, 0x00, 0x01,
+ 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x03, 0x04, 0x01, 0x02, 0xff, 0xda, 0x00, 0x08, 0x01,
+ 0x01, 0x00, 0x00, 0x00, 0x01, 0x43, 0x7e, 0xa7, 0x97, 0x57, 0xff, 0xc4,
+ 0x00, 0x1b, 0x10, 0x00, 0x03, 0x00, 0x02, 0x03, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x02, 0x11, 0x00, 0x03,
+ 0x10, 0x12, 0x13, 0xff, 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x01, 0x05,
+ 0x02, 0x3b, 0xc0, 0x6f, 0x66, 0x76, 0x56, 0x23, 0x87, 0x99, 0x0d, 0x26,
+ 0x62, 0xf6, 0xbf, 0xff, 0xc4, 0x00, 0x1e, 0x10, 0x00, 0x02, 0x01, 0x03,
+ 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x11, 0x21, 0x02, 0x12, 0x32, 0x10, 0x31, 0x71, 0x81, 0xa1, 0xff,
+ 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x06, 0x3f, 0x02, 0x4b, 0xb3, 0x28,
+ 0x32, 0xd2, 0xed, 0xf9, 0x1d, 0x3e, 0x13, 0x51, 0x73, 0x83, 0xff, 0xc4,
+ 0x00, 0x1c, 0x10, 0x01, 0x01, 0x01, 0x00, 0x02, 0x03, 0x01, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x11, 0x00, 0x21, 0x51,
+ 0x31, 0x61, 0x81, 0xf0, 0xff, 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x01,
+ 0x3f, 0x21, 0x65, 0x6e, 0x31, 0x86, 0x28, 0xf9, 0x30, 0xdc, 0x27, 0xdb,
+ 0xa9, 0x01, 0xf3, 0xde, 0x02, 0xa0, 0xed, 0x1e, 0x34, 0x68, 0x23, 0xf9,
+ 0xc6, 0x48, 0x5d, 0x7a, 0x35, 0x02, 0xf5, 0x6f, 0xff, 0xda, 0x00, 0x08,
+ 0x01, 0x01, 0x00, 0x00, 0x00, 0x10, 0x35, 0xff, 0xc4, 0x00, 0x1f, 0x10,
+ 0x01, 0x00, 0x02, 0x01, 0x04, 0x03, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x11, 0x31, 0x41, 0x61, 0x71, 0x91,
+ 0x21, 0x81, 0xd1, 0xb1, 0xff, 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x01,
+ 0x3f, 0x10, 0x0b, 0x30, 0xe9, 0x58, 0xbe, 0x1a, 0xfd, 0x88, 0xab, 0x8b,
+ 0x34, 0x74, 0x80, 0x4b, 0xb5, 0xd5, 0xab, 0xcd, 0x46, 0x96, 0x2e, 0xec,
+ 0xbd, 0xaa, 0x78, 0x47, 0x5c, 0x47, 0xa7, 0x30, 0x49, 0xad, 0x88, 0x7c,
+ 0x40, 0x74, 0x30, 0xff, 0x00, 0x23, 0x1d, 0x03, 0x0b, 0xb7, 0xd4, 0xff,
+ 0xd9};
+static const size_t kTest0JpgLen = 421;
+
+// test 1 is J444
+static const uint8_t kTest1Jpg[] = {
+ 0xff, 0xd8, 0xff, 0xe0, 0x00, 0x10, 0x4a, 0x46, 0x49, 0x46, 0x00, 0x01,
+ 0x01, 0x01, 0x00, 0x48, 0x00, 0x48, 0x00, 0x00, 0xff, 0xdb, 0x00, 0x43,
+ 0x00, 0x10, 0x0b, 0x0c, 0x0e, 0x0c, 0x0a, 0x10, 0x0e, 0x0d, 0x0e, 0x12,
+ 0x11, 0x10, 0x13, 0x18, 0x28, 0x1a, 0x18, 0x16, 0x16, 0x18, 0x31, 0x23,
+ 0x25, 0x1d, 0x28, 0x3a, 0x33, 0x3d, 0x3c, 0x39, 0x33, 0x38, 0x37, 0x40,
+ 0x48, 0x5c, 0x4e, 0x40, 0x44, 0x57, 0x45, 0x37, 0x38, 0x50, 0x6d, 0x51,
+ 0x57, 0x5f, 0x62, 0x67, 0x68, 0x67, 0x3e, 0x4d, 0x71, 0x79, 0x70, 0x64,
+ 0x78, 0x5c, 0x65, 0x67, 0x63, 0xff, 0xdb, 0x00, 0x43, 0x01, 0x11, 0x12,
+ 0x12, 0x18, 0x15, 0x18, 0x2f, 0x1a, 0x1a, 0x2f, 0x63, 0x42, 0x38, 0x42,
+ 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63,
+ 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63,
+ 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63,
+ 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63,
+ 0x63, 0x63, 0xff, 0xc2, 0x00, 0x11, 0x08, 0x00, 0x10, 0x00, 0x20, 0x03,
+ 0x01, 0x11, 0x00, 0x02, 0x11, 0x01, 0x03, 0x11, 0x01, 0xff, 0xc4, 0x00,
+ 0x17, 0x00, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x04, 0x01, 0x02, 0xff, 0xc4,
+ 0x00, 0x16, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x01, 0x03, 0xff, 0xda,
+ 0x00, 0x0c, 0x03, 0x01, 0x00, 0x02, 0x10, 0x03, 0x10, 0x00, 0x00, 0x01,
+ 0x40, 0x8f, 0x26, 0xe8, 0xf4, 0xcc, 0xf9, 0x69, 0x2b, 0x1b, 0x2a, 0xcb,
+ 0xff, 0xc4, 0x00, 0x1b, 0x10, 0x00, 0x03, 0x00, 0x02, 0x03, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x02, 0x11,
+ 0x00, 0x03, 0x10, 0x12, 0x13, 0xff, 0xda, 0x00, 0x08, 0x01, 0x01, 0x00,
+ 0x01, 0x05, 0x02, 0x3b, 0x80, 0x6f, 0x56, 0x76, 0x56, 0x23, 0x87, 0x99,
+ 0x0d, 0x26, 0x62, 0xf6, 0xbf, 0xff, 0xc4, 0x00, 0x19, 0x11, 0x01, 0x00,
+ 0x03, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x01, 0x00, 0x10, 0x11, 0x02, 0x12, 0xff, 0xda, 0x00, 0x08,
+ 0x01, 0x03, 0x01, 0x01, 0x3f, 0x01, 0xf1, 0x00, 0x27, 0x45, 0xbb, 0x31,
+ 0xaf, 0xff, 0xc4, 0x00, 0x1a, 0x11, 0x00, 0x02, 0x03, 0x01, 0x01, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01,
+ 0x02, 0x10, 0x11, 0x41, 0x12, 0xff, 0xda, 0x00, 0x08, 0x01, 0x02, 0x01,
+ 0x01, 0x3f, 0x01, 0xf6, 0x4b, 0x5f, 0x48, 0xb3, 0x69, 0x63, 0x35, 0x72,
+ 0xbf, 0xff, 0xc4, 0x00, 0x1e, 0x10, 0x00, 0x02, 0x01, 0x03, 0x05, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x11,
+ 0x21, 0x02, 0x12, 0x32, 0x10, 0x31, 0x71, 0x81, 0xa1, 0xff, 0xda, 0x00,
+ 0x08, 0x01, 0x01, 0x00, 0x06, 0x3f, 0x02, 0x4b, 0xb3, 0x28, 0x32, 0xd2,
+ 0xed, 0xf9, 0x1d, 0x3e, 0x13, 0x51, 0x73, 0x83, 0xff, 0xc4, 0x00, 0x1c,
+ 0x10, 0x01, 0x01, 0x01, 0x00, 0x02, 0x03, 0x01, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x11, 0x00, 0x21, 0x51, 0x31, 0x61,
+ 0x81, 0xf0, 0xff, 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x01, 0x3f, 0x21,
+ 0x75, 0x6e, 0x31, 0x94, 0x28, 0xf9, 0x30, 0xdc, 0x27, 0xdb, 0xa9, 0x01,
+ 0xf3, 0xde, 0x02, 0xa0, 0xed, 0x1e, 0x34, 0x68, 0x23, 0xf9, 0xc6, 0x48,
+ 0x5d, 0x7a, 0x35, 0x02, 0xf5, 0x6f, 0xff, 0xda, 0x00, 0x0c, 0x03, 0x01,
+ 0x00, 0x02, 0x00, 0x03, 0x00, 0x00, 0x00, 0x10, 0x26, 0x61, 0xd4, 0xff,
+ 0xc4, 0x00, 0x1a, 0x11, 0x00, 0x03, 0x01, 0x00, 0x03, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x11, 0x21,
+ 0x31, 0x41, 0x51, 0xff, 0xda, 0x00, 0x08, 0x01, 0x03, 0x01, 0x01, 0x3f,
+ 0x10, 0x54, 0xa8, 0xbf, 0x50, 0x87, 0xb0, 0x9d, 0x8b, 0xc4, 0x6a, 0x26,
+ 0x6b, 0x2a, 0x9c, 0x1f, 0xff, 0xc4, 0x00, 0x18, 0x11, 0x01, 0x01, 0x01,
+ 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x01, 0x00, 0x11, 0x21, 0x51, 0xff, 0xda, 0x00, 0x08, 0x01, 0x02,
+ 0x01, 0x01, 0x3f, 0x10, 0x70, 0xe1, 0x3e, 0xd1, 0x8e, 0x0d, 0xe1, 0xb5,
+ 0xd5, 0x91, 0x76, 0x43, 0x82, 0x45, 0x4c, 0x7b, 0x7f, 0xff, 0xc4, 0x00,
+ 0x1f, 0x10, 0x01, 0x00, 0x02, 0x01, 0x04, 0x03, 0x01, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x11, 0x31, 0x41, 0x61,
+ 0x71, 0x91, 0x21, 0x81, 0xd1, 0xb1, 0xff, 0xda, 0x00, 0x08, 0x01, 0x01,
+ 0x00, 0x01, 0x3f, 0x10, 0x1b, 0x30, 0xe9, 0x58, 0xbe, 0x1a, 0xfd, 0x8a,
+ 0xeb, 0x8b, 0x34, 0x74, 0x80, 0x4b, 0xb5, 0xd5, 0xab, 0xcd, 0x46, 0x96,
+ 0x2e, 0xec, 0xbd, 0xaa, 0x78, 0x47, 0x5c, 0x47, 0xa7, 0x30, 0x49, 0xad,
+ 0x88, 0x7c, 0x40, 0x74, 0x30, 0xff, 0x00, 0x23, 0x1d, 0x03, 0x0b, 0xb7,
+ 0xd4, 0xff, 0xd9};
+static const size_t kTest1JpgLen = 735;
+
+// test 2 is J420
+static const uint8_t kTest2Jpg[] = {
+ 0xff, 0xd8, 0xff, 0xe0, 0x00, 0x10, 0x4a, 0x46, 0x49, 0x46, 0x00, 0x01,
+ 0x01, 0x01, 0x00, 0x48, 0x00, 0x48, 0x00, 0x00, 0xff, 0xdb, 0x00, 0x43,
+ 0x00, 0x10, 0x0b, 0x0c, 0x0e, 0x0c, 0x0a, 0x10, 0x0e, 0x0d, 0x0e, 0x12,
+ 0x11, 0x10, 0x13, 0x18, 0x28, 0x1a, 0x18, 0x16, 0x16, 0x18, 0x31, 0x23,
+ 0x25, 0x1d, 0x28, 0x3a, 0x33, 0x3d, 0x3c, 0x39, 0x33, 0x38, 0x37, 0x40,
+ 0x48, 0x5c, 0x4e, 0x40, 0x44, 0x57, 0x45, 0x37, 0x38, 0x50, 0x6d, 0x51,
+ 0x57, 0x5f, 0x62, 0x67, 0x68, 0x67, 0x3e, 0x4d, 0x71, 0x79, 0x70, 0x64,
+ 0x78, 0x5c, 0x65, 0x67, 0x63, 0xff, 0xdb, 0x00, 0x43, 0x01, 0x11, 0x12,
+ 0x12, 0x18, 0x15, 0x18, 0x2f, 0x1a, 0x1a, 0x2f, 0x63, 0x42, 0x38, 0x42,
+ 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63,
+ 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63,
+ 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63,
+ 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63,
+ 0x63, 0x63, 0xff, 0xc2, 0x00, 0x11, 0x08, 0x00, 0x10, 0x00, 0x20, 0x03,
+ 0x01, 0x22, 0x00, 0x02, 0x11, 0x01, 0x03, 0x11, 0x01, 0xff, 0xc4, 0x00,
+ 0x18, 0x00, 0x00, 0x02, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x05, 0x01, 0x02, 0x04, 0xff,
+ 0xc4, 0x00, 0x16, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x01, 0x02, 0xff,
+ 0xda, 0x00, 0x0c, 0x03, 0x01, 0x00, 0x02, 0x10, 0x03, 0x10, 0x00, 0x00,
+ 0x01, 0x20, 0xe7, 0x28, 0xa3, 0x0b, 0x2e, 0x2d, 0xcf, 0xff, 0xc4, 0x00,
+ 0x1b, 0x10, 0x00, 0x03, 0x00, 0x02, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x02, 0x11, 0x00, 0x03, 0x10,
+ 0x12, 0x13, 0xff, 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x01, 0x05, 0x02,
+ 0x3b, 0x80, 0x6f, 0x56, 0x76, 0x56, 0x23, 0x87, 0x99, 0x0d, 0x26, 0x62,
+ 0xf6, 0xbf, 0xff, 0xc4, 0x00, 0x17, 0x11, 0x01, 0x00, 0x03, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x01, 0x11, 0x21, 0xff, 0xda, 0x00, 0x08, 0x01, 0x03, 0x01, 0x01, 0x3f,
+ 0x01, 0xc8, 0x53, 0xff, 0xc4, 0x00, 0x16, 0x11, 0x01, 0x01, 0x01, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x11, 0x32, 0xff, 0xda, 0x00, 0x08, 0x01, 0x02, 0x01, 0x01, 0x3f,
+ 0x01, 0xd2, 0xc7, 0xff, 0xc4, 0x00, 0x1e, 0x10, 0x00, 0x02, 0x01, 0x03,
+ 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x11, 0x21, 0x02, 0x12, 0x32, 0x10, 0x31, 0x71, 0x81, 0xa1, 0xff,
+ 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x06, 0x3f, 0x02, 0x4b, 0xb3, 0x28,
+ 0x32, 0xd2, 0xed, 0xf9, 0x1d, 0x3e, 0x13, 0x51, 0x73, 0x83, 0xff, 0xc4,
+ 0x00, 0x1c, 0x10, 0x01, 0x01, 0x01, 0x00, 0x02, 0x03, 0x01, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x11, 0x00, 0x21, 0x51,
+ 0x31, 0x61, 0x81, 0xf0, 0xff, 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x01,
+ 0x3f, 0x21, 0x75, 0x6e, 0x31, 0x94, 0x28, 0xf9, 0x30, 0xdc, 0x27, 0xdb,
+ 0xa9, 0x01, 0xf3, 0xde, 0x02, 0xa0, 0xed, 0x1e, 0x34, 0x68, 0x23, 0xf9,
+ 0xc6, 0x48, 0x5d, 0x7a, 0x35, 0x02, 0xf5, 0x6f, 0xff, 0xda, 0x00, 0x0c,
+ 0x03, 0x01, 0x00, 0x02, 0x00, 0x03, 0x00, 0x00, 0x00, 0x10, 0x13, 0x5f,
+ 0xff, 0xc4, 0x00, 0x17, 0x11, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x11,
+ 0x21, 0xff, 0xda, 0x00, 0x08, 0x01, 0x03, 0x01, 0x01, 0x3f, 0x10, 0x0e,
+ 0xa1, 0x3a, 0x76, 0xff, 0xc4, 0x00, 0x17, 0x11, 0x01, 0x01, 0x01, 0x01,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x01, 0x00, 0x21, 0x11, 0xff, 0xda, 0x00, 0x08, 0x01, 0x02, 0x01, 0x01,
+ 0x3f, 0x10, 0x57, 0x0b, 0x08, 0x70, 0xdb, 0xff, 0xc4, 0x00, 0x1f, 0x10,
+ 0x01, 0x00, 0x02, 0x01, 0x04, 0x03, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x11, 0x31, 0x41, 0x61, 0x71, 0x91,
+ 0x21, 0x81, 0xd1, 0xb1, 0xff, 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x01,
+ 0x3f, 0x10, 0x1b, 0x30, 0xe9, 0x58, 0xbe, 0x1a, 0xfd, 0x8a, 0xeb, 0x8b,
+ 0x34, 0x74, 0x80, 0x4b, 0xb5, 0xd5, 0xab, 0xcd, 0x46, 0x96, 0x2e, 0xec,
+ 0xbd, 0xaa, 0x78, 0x47, 0x5c, 0x47, 0xa7, 0x30, 0x49, 0xad, 0x88, 0x7c,
+ 0x40, 0x74, 0x30, 0xff, 0x00, 0x23, 0x1d, 0x03, 0x0b, 0xb7, 0xd4, 0xff,
+ 0xd9};
+static const size_t kTest2JpgLen = 685;
+
+// test 3 is J422
+static const uint8_t kTest3Jpg[] = {
+ 0xff, 0xd8, 0xff, 0xe0, 0x00, 0x10, 0x4a, 0x46, 0x49, 0x46, 0x00, 0x01,
+ 0x01, 0x01, 0x00, 0x48, 0x00, 0x48, 0x00, 0x00, 0xff, 0xdb, 0x00, 0x43,
+ 0x00, 0x10, 0x0b, 0x0c, 0x0e, 0x0c, 0x0a, 0x10, 0x0e, 0x0d, 0x0e, 0x12,
+ 0x11, 0x10, 0x13, 0x18, 0x28, 0x1a, 0x18, 0x16, 0x16, 0x18, 0x31, 0x23,
+ 0x25, 0x1d, 0x28, 0x3a, 0x33, 0x3d, 0x3c, 0x39, 0x33, 0x38, 0x37, 0x40,
+ 0x48, 0x5c, 0x4e, 0x40, 0x44, 0x57, 0x45, 0x37, 0x38, 0x50, 0x6d, 0x51,
+ 0x57, 0x5f, 0x62, 0x67, 0x68, 0x67, 0x3e, 0x4d, 0x71, 0x79, 0x70, 0x64,
+ 0x78, 0x5c, 0x65, 0x67, 0x63, 0xff, 0xdb, 0x00, 0x43, 0x01, 0x11, 0x12,
+ 0x12, 0x18, 0x15, 0x18, 0x2f, 0x1a, 0x1a, 0x2f, 0x63, 0x42, 0x38, 0x42,
+ 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63,
+ 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63,
+ 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63,
+ 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63,
+ 0x63, 0x63, 0xff, 0xc2, 0x00, 0x11, 0x08, 0x00, 0x10, 0x00, 0x20, 0x03,
+ 0x01, 0x21, 0x00, 0x02, 0x11, 0x01, 0x03, 0x11, 0x01, 0xff, 0xc4, 0x00,
+ 0x17, 0x00, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x04, 0x01, 0x02, 0xff, 0xc4,
+ 0x00, 0x17, 0x01, 0x00, 0x03, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x02, 0x03, 0x00, 0xff,
+ 0xda, 0x00, 0x0c, 0x03, 0x01, 0x00, 0x02, 0x10, 0x03, 0x10, 0x00, 0x00,
+ 0x01, 0x43, 0x8d, 0x1f, 0xa2, 0xb3, 0xca, 0x1b, 0x57, 0x0f, 0xff, 0xc4,
+ 0x00, 0x1b, 0x10, 0x00, 0x03, 0x00, 0x02, 0x03, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x02, 0x11, 0x00, 0x03,
+ 0x10, 0x12, 0x13, 0xff, 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x01, 0x05,
+ 0x02, 0x3b, 0x80, 0x6f, 0x56, 0x76, 0x56, 0x23, 0x87, 0x99, 0x0d, 0x26,
+ 0x62, 0xf6, 0xbf, 0xff, 0xc4, 0x00, 0x19, 0x11, 0x00, 0x02, 0x03, 0x01,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x01, 0x02, 0x10, 0x11, 0x21, 0xff, 0xda, 0x00, 0x08, 0x01, 0x03,
+ 0x01, 0x01, 0x3f, 0x01, 0x51, 0xce, 0x8c, 0x75, 0xff, 0xc4, 0x00, 0x18,
+ 0x11, 0x00, 0x03, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x02, 0x61, 0x21, 0xff, 0xda,
+ 0x00, 0x08, 0x01, 0x02, 0x01, 0x01, 0x3f, 0x01, 0xa6, 0xd9, 0x2f, 0x84,
+ 0xe8, 0xf0, 0xff, 0xc4, 0x00, 0x1e, 0x10, 0x00, 0x02, 0x01, 0x03, 0x05,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x11, 0x21, 0x02, 0x12, 0x32, 0x10, 0x31, 0x71, 0x81, 0xa1, 0xff, 0xda,
+ 0x00, 0x08, 0x01, 0x01, 0x00, 0x06, 0x3f, 0x02, 0x4b, 0xb3, 0x28, 0x32,
+ 0xd2, 0xed, 0xf9, 0x1d, 0x3e, 0x13, 0x51, 0x73, 0x83, 0xff, 0xc4, 0x00,
+ 0x1c, 0x10, 0x01, 0x01, 0x01, 0x00, 0x02, 0x03, 0x01, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x11, 0x00, 0x21, 0x51, 0x31,
+ 0x61, 0x81, 0xf0, 0xff, 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x01, 0x3f,
+ 0x21, 0x75, 0x6e, 0x31, 0x94, 0x28, 0xf9, 0x30, 0xdc, 0x27, 0xdb, 0xa9,
+ 0x01, 0xf3, 0xde, 0x02, 0xa0, 0xed, 0x1e, 0x34, 0x68, 0x23, 0xf9, 0xc6,
+ 0x48, 0x5d, 0x7a, 0x35, 0x02, 0xf5, 0x6f, 0xff, 0xda, 0x00, 0x0c, 0x03,
+ 0x01, 0x00, 0x02, 0x00, 0x03, 0x00, 0x00, 0x00, 0x10, 0x2e, 0x45, 0xff,
+ 0xc4, 0x00, 0x18, 0x11, 0x00, 0x03, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x11, 0x21,
+ 0x31, 0xff, 0xda, 0x00, 0x08, 0x01, 0x03, 0x01, 0x01, 0x3f, 0x10, 0x53,
+ 0x50, 0xba, 0x54, 0xc1, 0x67, 0x4f, 0xff, 0xc4, 0x00, 0x18, 0x11, 0x00,
+ 0x03, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x01, 0x11, 0x21, 0x00, 0x10, 0xff, 0xda, 0x00, 0x08,
+ 0x01, 0x02, 0x01, 0x01, 0x3f, 0x10, 0x18, 0x81, 0x5c, 0x04, 0x1a, 0xca,
+ 0x91, 0xbf, 0xff, 0xc4, 0x00, 0x1f, 0x10, 0x01, 0x00, 0x02, 0x01, 0x04,
+ 0x03, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01,
+ 0x00, 0x11, 0x31, 0x41, 0x61, 0x71, 0x91, 0x21, 0x81, 0xd1, 0xb1, 0xff,
+ 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x01, 0x3f, 0x10, 0x1b, 0x30, 0xe9,
+ 0x58, 0xbe, 0x1a, 0xfd, 0x8a, 0xeb, 0x8b, 0x34, 0x74, 0x80, 0x4b, 0xb5,
+ 0xd5, 0xab, 0xcd, 0x46, 0x96, 0x2e, 0xec, 0xbd, 0xaa, 0x78, 0x47, 0x5c,
+ 0x47, 0xa7, 0x30, 0x49, 0xad, 0x88, 0x7c, 0x40, 0x74, 0x30, 0xff, 0x00,
+ 0x23, 0x1d, 0x03, 0x0b, 0xb7, 0xd4, 0xff, 0xd9};
+static const size_t kTest3JpgLen = 704;
+
+// test 4 is J422 vertical - not supported
+static const uint8_t kTest4Jpg[] = {
+ 0xff, 0xd8, 0xff, 0xe0, 0x00, 0x10, 0x4a, 0x46, 0x49, 0x46, 0x00, 0x01,
+ 0x01, 0x01, 0x00, 0x48, 0x00, 0x48, 0x00, 0x00, 0xff, 0xdb, 0x00, 0x43,
+ 0x00, 0x10, 0x0b, 0x0c, 0x0e, 0x0c, 0x0a, 0x10, 0x0e, 0x0d, 0x0e, 0x12,
+ 0x11, 0x10, 0x13, 0x18, 0x28, 0x1a, 0x18, 0x16, 0x16, 0x18, 0x31, 0x23,
+ 0x25, 0x1d, 0x28, 0x3a, 0x33, 0x3d, 0x3c, 0x39, 0x33, 0x38, 0x37, 0x40,
+ 0x48, 0x5c, 0x4e, 0x40, 0x44, 0x57, 0x45, 0x37, 0x38, 0x50, 0x6d, 0x51,
+ 0x57, 0x5f, 0x62, 0x67, 0x68, 0x67, 0x3e, 0x4d, 0x71, 0x79, 0x70, 0x64,
+ 0x78, 0x5c, 0x65, 0x67, 0x63, 0xff, 0xdb, 0x00, 0x43, 0x01, 0x11, 0x12,
+ 0x12, 0x18, 0x15, 0x18, 0x2f, 0x1a, 0x1a, 0x2f, 0x63, 0x42, 0x38, 0x42,
+ 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63,
+ 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63,
+ 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63,
+ 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63,
+ 0x63, 0x63, 0xff, 0xc2, 0x00, 0x11, 0x08, 0x00, 0x10, 0x00, 0x20, 0x03,
+ 0x01, 0x12, 0x00, 0x02, 0x11, 0x01, 0x03, 0x11, 0x01, 0xff, 0xc4, 0x00,
+ 0x18, 0x00, 0x00, 0x02, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x05, 0x01, 0x02, 0x03, 0xff,
+ 0xc4, 0x00, 0x16, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x03, 0xff,
+ 0xda, 0x00, 0x0c, 0x03, 0x01, 0x00, 0x02, 0x10, 0x03, 0x10, 0x00, 0x00,
+ 0x01, 0xd2, 0x98, 0xe9, 0x03, 0x0c, 0x00, 0x46, 0x21, 0xd9, 0xff, 0xc4,
+ 0x00, 0x1b, 0x10, 0x00, 0x03, 0x00, 0x02, 0x03, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x02, 0x11, 0x00, 0x03,
+ 0x10, 0x12, 0x13, 0xff, 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x01, 0x05,
+ 0x02, 0x3b, 0x80, 0x6f, 0x56, 0x76, 0x56, 0x23, 0x87, 0x99, 0x0d, 0x26,
+ 0x62, 0xf6, 0xbf, 0xff, 0xc4, 0x00, 0x17, 0x11, 0x01, 0x01, 0x01, 0x01,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x11, 0x01, 0x21, 0xff, 0xda, 0x00, 0x08, 0x01, 0x03, 0x01, 0x01,
+ 0x3f, 0x01, 0x98, 0xb1, 0xbd, 0x47, 0xff, 0xc4, 0x00, 0x18, 0x11, 0x00,
+ 0x03, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x01, 0x12, 0x11, 0x21, 0xff, 0xda, 0x00, 0x08,
+ 0x01, 0x02, 0x01, 0x01, 0x3f, 0x01, 0xb6, 0x35, 0xa2, 0xe1, 0x47, 0xff,
+ 0xc4, 0x00, 0x1e, 0x10, 0x00, 0x02, 0x01, 0x03, 0x05, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x11, 0x21, 0x02,
+ 0x12, 0x32, 0x10, 0x31, 0x71, 0x81, 0xa1, 0xff, 0xda, 0x00, 0x08, 0x01,
+ 0x01, 0x00, 0x06, 0x3f, 0x02, 0x4b, 0xb3, 0x28, 0x32, 0xd2, 0xed, 0xf9,
+ 0x1d, 0x3e, 0x13, 0x51, 0x73, 0x83, 0xff, 0xc4, 0x00, 0x1c, 0x10, 0x01,
+ 0x01, 0x01, 0x00, 0x02, 0x03, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x01, 0x11, 0x00, 0x21, 0x51, 0x31, 0x61, 0x81, 0xf0,
+ 0xff, 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x01, 0x3f, 0x21, 0x75, 0x6e,
+ 0x31, 0x94, 0x28, 0xf9, 0x30, 0xdc, 0x27, 0xdb, 0xa9, 0x01, 0xf3, 0xde,
+ 0x02, 0xa0, 0xed, 0x1e, 0x34, 0x68, 0x23, 0xf9, 0xc6, 0x48, 0x5d, 0x7a,
+ 0x35, 0x02, 0xf5, 0x6f, 0xff, 0xda, 0x00, 0x0c, 0x03, 0x01, 0x00, 0x02,
+ 0x00, 0x03, 0x00, 0x00, 0x00, 0x10, 0x24, 0xaf, 0xff, 0xc4, 0x00, 0x19,
+ 0x11, 0x00, 0x03, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x11, 0x51, 0x21, 0x31, 0xff,
+ 0xda, 0x00, 0x08, 0x01, 0x03, 0x01, 0x01, 0x3f, 0x10, 0x59, 0x11, 0xca,
+ 0x42, 0x60, 0x9f, 0x69, 0xff, 0xc4, 0x00, 0x19, 0x11, 0x00, 0x02, 0x03,
+ 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x01, 0x11, 0x21, 0x31, 0x61, 0xff, 0xda, 0x00, 0x08, 0x01,
+ 0x02, 0x01, 0x01, 0x3f, 0x10, 0xb0, 0xd7, 0x27, 0x51, 0xb6, 0x41, 0xff,
+ 0xc4, 0x00, 0x1f, 0x10, 0x01, 0x00, 0x02, 0x01, 0x04, 0x03, 0x01, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x11, 0x31,
+ 0x41, 0x61, 0x71, 0x91, 0x21, 0x81, 0xd1, 0xb1, 0xff, 0xda, 0x00, 0x08,
+ 0x01, 0x01, 0x00, 0x01, 0x3f, 0x10, 0x1b, 0x30, 0xe9, 0x58, 0xbe, 0x1a,
+ 0xfd, 0x8a, 0xeb, 0x8b, 0x34, 0x74, 0x80, 0x4b, 0xb5, 0xd5, 0xab, 0xcd,
+ 0x46, 0x96, 0x2e, 0xec, 0xbd, 0xaa, 0x78, 0x47, 0x5c, 0x47, 0xa7, 0x30,
+ 0x49, 0xad, 0x88, 0x7c, 0x40, 0x74, 0x30, 0xff, 0x00, 0x23, 0x1d, 0x03,
+ 0x0b, 0xb7, 0xd4, 0xff, 0xd9};
+static const size_t kTest4JpgLen = 701;
+
+TEST_F(LibYUVConvertTest, TestMJPGSize) {
+ int width = 0;
+ int height = 0;
+ int ret = MJPGSize(kTest2Jpg, kTest2JpgLen, &width, &height);
+ EXPECT_EQ(0, ret);
+
+ printf("test jpeg size %d x %d\n", width, height);
+}
+
+TEST_F(LibYUVConvertTest, TestMJPGToI420) {
+ int width = 0;
+ int height = 0;
+ int ret = MJPGSize(kTest2Jpg, kTest2JpgLen, &width, &height);
+ EXPECT_EQ(0, ret);
+
+ int half_width = (width + 1) / 2;
+ int half_height = (height + 1) / 2;
+ int benchmark_iterations = benchmark_iterations_ * benchmark_width_ *
+ benchmark_height_ / (width * height);
+ if (benchmark_iterations < 1) {
+ benchmark_iterations = 1;
+ }
+
+ align_buffer_page_end(dst_y, width * height);
+ align_buffer_page_end(dst_u, half_width * half_height);
+ align_buffer_page_end(dst_v, half_width * half_height);
+ for (int times = 0; times < benchmark_iterations; ++times) {
+ ret = MJPGToI420(kTest2Jpg, kTest2JpgLen, dst_y, width, dst_u, half_width,
+ dst_v, half_width, width, height, width, height);
+ }
+ // Expect sucesss
+ EXPECT_EQ(0, ret);
+
+ // Test result matches known hash value.
+ uint32_t dst_y_hash = HashDjb2(dst_y, width * height, 5381);
+ uint32_t dst_u_hash = HashDjb2(dst_u, half_width * half_height, 5381);
+ uint32_t dst_v_hash = HashDjb2(dst_v, half_width * half_height, 5381);
+ EXPECT_EQ(dst_y_hash, 2682851208u);
+ EXPECT_EQ(dst_u_hash, 2501859930u);
+ EXPECT_EQ(dst_v_hash, 2126459123u);
+
+ free_aligned_buffer_page_end(dst_y);
+ free_aligned_buffer_page_end(dst_u);
+ free_aligned_buffer_page_end(dst_v);
+}
+
+TEST_F(LibYUVConvertTest, TestMJPGToI420_NV21) {
+ int width = 0;
+ int height = 0;
+ int ret = MJPGSize(kTest2Jpg, kTest2JpgLen, &width, &height);
+ EXPECT_EQ(0, ret);
+
+ int half_width = (width + 1) / 2;
+ int half_height = (height + 1) / 2;
+ int benchmark_iterations = benchmark_iterations_ * benchmark_width_ *
+ benchmark_height_ / (width * height);
+ if (benchmark_iterations < 1) {
+ benchmark_iterations = 1;
+ }
+
+ // Convert to NV21
+ align_buffer_page_end(dst_y, width * height);
+ align_buffer_page_end(dst_vu, half_width * half_height * 2);
+
+ for (int times = 0; times < benchmark_iterations; ++times) {
+ ret = MJPGToNV21(kTest2Jpg, kTest2JpgLen, dst_y, width, dst_vu,
+ half_width * 2, width, height, width, height);
+ }
+ // Expect sucesss
+ EXPECT_EQ(0, ret);
+
+ // Convert to I420
+ align_buffer_page_end(dst2_y, width * height);
+ align_buffer_page_end(dst2_u, half_width * half_height);
+ align_buffer_page_end(dst2_v, half_width * half_height);
+ for (int times = 0; times < benchmark_iterations; ++times) {
+ ret = MJPGToI420(kTest2Jpg, kTest2JpgLen, dst2_y, width, dst2_u, half_width,
+ dst2_v, half_width, width, height, width, height);
+ }
+ // Expect sucesss
+ EXPECT_EQ(0, ret);
+
+ // Convert I420 to NV21
+ align_buffer_page_end(dst3_y, width * height);
+ align_buffer_page_end(dst3_vu, half_width * half_height * 2);
+
+ I420ToNV21(dst2_y, width, dst2_u, half_width, dst2_v, half_width, dst3_y,
+ width, dst3_vu, half_width * 2, width, height);
+
+ for (int i = 0; i < width * height; ++i) {
+ EXPECT_EQ(dst_y[i], dst3_y[i]);
+ }
+ for (int i = 0; i < half_width * half_height * 2; ++i) {
+ EXPECT_EQ(dst_vu[i], dst3_vu[i]);
+ EXPECT_EQ(dst_vu[i], dst3_vu[i]);
+ }
+
+ free_aligned_buffer_page_end(dst3_y);
+ free_aligned_buffer_page_end(dst3_vu);
+
+ free_aligned_buffer_page_end(dst2_y);
+ free_aligned_buffer_page_end(dst2_u);
+ free_aligned_buffer_page_end(dst2_v);
+
+ free_aligned_buffer_page_end(dst_y);
+ free_aligned_buffer_page_end(dst_vu);
+}
+
+TEST_F(LibYUVConvertTest, TestMJPGToI420_NV12) {
+ int width = 0;
+ int height = 0;
+ int ret = MJPGSize(kTest2Jpg, kTest2JpgLen, &width, &height);
+ EXPECT_EQ(0, ret);
+
+ int half_width = (width + 1) / 2;
+ int half_height = (height + 1) / 2;
+ int benchmark_iterations = benchmark_iterations_ * benchmark_width_ *
+ benchmark_height_ / (width * height);
+ if (benchmark_iterations < 1) {
+ benchmark_iterations = 1;
+ }
+
+ // Convert to NV12
+ align_buffer_page_end(dst_y, width * height);
+ align_buffer_page_end(dst_uv, half_width * half_height * 2);
+
+ for (int times = 0; times < benchmark_iterations; ++times) {
+ ret = MJPGToNV12(kTest2Jpg, kTest2JpgLen, dst_y, width, dst_uv,
+ half_width * 2, width, height, width, height);
+ }
+ // Expect sucesss
+ EXPECT_EQ(0, ret);
+
+ // Convert to I420
+ align_buffer_page_end(dst2_y, width * height);
+ align_buffer_page_end(dst2_u, half_width * half_height);
+ align_buffer_page_end(dst2_v, half_width * half_height);
+ for (int times = 0; times < benchmark_iterations; ++times) {
+ ret = MJPGToI420(kTest2Jpg, kTest2JpgLen, dst2_y, width, dst2_u, half_width,
+ dst2_v, half_width, width, height, width, height);
+ }
+ // Expect sucesss
+ EXPECT_EQ(0, ret);
+
+ // Convert I420 to NV12
+ align_buffer_page_end(dst3_y, width * height);
+ align_buffer_page_end(dst3_uv, half_width * half_height * 2);
+
+ I420ToNV12(dst2_y, width, dst2_u, half_width, dst2_v, half_width, dst3_y,
+ width, dst3_uv, half_width * 2, width, height);
+
+ for (int i = 0; i < width * height; ++i) {
+ EXPECT_EQ(dst_y[i], dst3_y[i]);
+ }
+ for (int i = 0; i < half_width * half_height * 2; ++i) {
+ EXPECT_EQ(dst_uv[i], dst3_uv[i]);
+ EXPECT_EQ(dst_uv[i], dst3_uv[i]);
+ }
+
+ free_aligned_buffer_page_end(dst3_y);
+ free_aligned_buffer_page_end(dst3_uv);
+
+ free_aligned_buffer_page_end(dst2_y);
+ free_aligned_buffer_page_end(dst2_u);
+ free_aligned_buffer_page_end(dst2_v);
+
+ free_aligned_buffer_page_end(dst_y);
+ free_aligned_buffer_page_end(dst_uv);
+}
+
+TEST_F(LibYUVConvertTest, TestMJPGToNV21_420) {
+ int width = 0;
+ int height = 0;
+ int ret = MJPGSize(kTest2Jpg, kTest2JpgLen, &width, &height);
+ EXPECT_EQ(0, ret);
+
+ int half_width = (width + 1) / 2;
+ int half_height = (height + 1) / 2;
+ int benchmark_iterations = benchmark_iterations_ * benchmark_width_ *
+ benchmark_height_ / (width * height);
+ if (benchmark_iterations < 1) {
+ benchmark_iterations = 1;
+ }
+
+ align_buffer_page_end(dst_y, width * height);
+ align_buffer_page_end(dst_uv, half_width * half_height * 2);
+ for (int times = 0; times < benchmark_iterations; ++times) {
+ ret = MJPGToNV21(kTest2Jpg, kTest2JpgLen, dst_y, width, dst_uv,
+ half_width * 2, width, height, width, height);
+ }
+ // Expect sucesss
+ EXPECT_EQ(0, ret);
+
+ // Test result matches known hash value.
+ uint32_t dst_y_hash = HashDjb2(dst_y, width * height, 5381);
+ uint32_t dst_uv_hash = HashDjb2(dst_uv, half_width * half_height * 2, 5381);
+ EXPECT_EQ(dst_y_hash, 2682851208u);
+ EXPECT_EQ(dst_uv_hash, 1069662856u);
+
+ free_aligned_buffer_page_end(dst_y);
+ free_aligned_buffer_page_end(dst_uv);
+}
+
+TEST_F(LibYUVConvertTest, TestMJPGToNV12_420) {
+ int width = 0;
+ int height = 0;
+ int ret = MJPGSize(kTest2Jpg, kTest2JpgLen, &width, &height);
+ EXPECT_EQ(0, ret);
+
+ int half_width = (width + 1) / 2;
+ int half_height = (height + 1) / 2;
+ int benchmark_iterations = benchmark_iterations_ * benchmark_width_ *
+ benchmark_height_ / (width * height);
+ if (benchmark_iterations < 1) {
+ benchmark_iterations = 1;
+ }
+
+ align_buffer_page_end(dst_y, width * height);
+ align_buffer_page_end(dst_uv, half_width * half_height * 2);
+ for (int times = 0; times < benchmark_iterations; ++times) {
+ ret = MJPGToNV12(kTest2Jpg, kTest2JpgLen, dst_y, width, dst_uv,
+ half_width * 2, width, height, width, height);
+ }
+ // Expect sucesss
+ EXPECT_EQ(0, ret);
+
+ // Test result matches known hash value. Hashes are for VU so flip the plane.
+ uint32_t dst_y_hash = HashDjb2(dst_y, width * height, 5381);
+ align_buffer_page_end(dst_vu, half_width * half_height * 2);
+ SwapUVPlane(dst_uv, half_width * 2, dst_vu, half_width * 2, half_width,
+ half_height);
+ uint32_t dst_vu_hash = HashDjb2(dst_vu, half_width * half_height * 2, 5381);
+ EXPECT_EQ(dst_y_hash, 2682851208u);
+ EXPECT_EQ(dst_vu_hash, 1069662856u);
+
+ free_aligned_buffer_page_end(dst_y);
+ free_aligned_buffer_page_end(dst_uv);
+ free_aligned_buffer_page_end(dst_vu);
+}
+
+// TODO(fbarchard): Improve test to compare against I422, not checksum
+TEST_F(LibYUVConvertTest, DISABLED_TestMJPGToNV21_422) {
+ int width = 0;
+ int height = 0;
+ int ret = MJPGSize(kTest3Jpg, kTest3JpgLen, &width, &height);
+ EXPECT_EQ(0, ret);
+
+ int half_width = (width + 1) / 2;
+ int half_height = (height + 1) / 2;
+ int benchmark_iterations = benchmark_iterations_ * benchmark_width_ *
+ benchmark_height_ / (width * height);
+ if (benchmark_iterations < 1) {
+ benchmark_iterations = 1;
+ }
+
+ align_buffer_page_end(dst_y, width * height);
+ align_buffer_page_end(dst_uv, half_width * half_height * 2);
+ for (int times = 0; times < benchmark_iterations; ++times) {
+ ret = MJPGToNV21(kTest3Jpg, kTest3JpgLen, dst_y, width, dst_uv,
+ half_width * 2, width, height, width, height);
+ }
+ // Expect sucesss
+ EXPECT_EQ(0, ret);
+
+ // Test result matches known hash value.
+ uint32_t dst_y_hash = HashDjb2(dst_y, width * height, 5381);
+ uint32_t dst_uv_hash = HashDjb2(dst_uv, half_width * half_height * 2, 5381);
+ EXPECT_EQ(dst_y_hash, 2682851208u);
+ EXPECT_EQ(dst_uv_hash, 493520167u);
+
+ free_aligned_buffer_page_end(dst_y);
+ free_aligned_buffer_page_end(dst_uv);
+}
+
+TEST_F(LibYUVConvertTest, DISABLED_TestMJPGToNV12_422) {
+ int width = 0;
+ int height = 0;
+ int ret = MJPGSize(kTest3Jpg, kTest3JpgLen, &width, &height);
+ EXPECT_EQ(0, ret);
+
+ int half_width = (width + 1) / 2;
+ int half_height = (height + 1) / 2;
+ int benchmark_iterations = benchmark_iterations_ * benchmark_width_ *
+ benchmark_height_ / (width * height);
+ if (benchmark_iterations < 1) {
+ benchmark_iterations = 1;
+ }
+
+ align_buffer_page_end(dst_y, width * height);
+ align_buffer_page_end(dst_uv, half_width * half_height * 2);
+ for (int times = 0; times < benchmark_iterations; ++times) {
+ ret = MJPGToNV12(kTest3Jpg, kTest3JpgLen, dst_y, width, dst_uv,
+ half_width * 2, width, height, width, height);
+ }
+ // Expect sucesss
+ EXPECT_EQ(0, ret);
+
+ // Test result matches known hash value. Hashes are for VU so flip the plane.
+ uint32_t dst_y_hash = HashDjb2(dst_y, width * height, 5381);
+ align_buffer_page_end(dst_vu, half_width * half_height * 2);
+ SwapUVPlane(dst_uv, half_width * 2, dst_vu, half_width * 2, half_width,
+ half_height);
+ uint32_t dst_vu_hash = HashDjb2(dst_vu, half_width * half_height * 2, 5381);
+ EXPECT_EQ(dst_y_hash, 2682851208u);
+ EXPECT_EQ(dst_vu_hash, 493520167u);
+
+ free_aligned_buffer_page_end(dst_y);
+ free_aligned_buffer_page_end(dst_uv);
+ free_aligned_buffer_page_end(dst_vu);
+}
+
+TEST_F(LibYUVConvertTest, TestMJPGToNV21_400) {
+ int width = 0;
+ int height = 0;
+ int ret = MJPGSize(kTest0Jpg, kTest0JpgLen, &width, &height);
+ EXPECT_EQ(0, ret);
+
+ int half_width = (width + 1) / 2;
+ int half_height = (height + 1) / 2;
+ int benchmark_iterations = benchmark_iterations_ * benchmark_width_ *
+ benchmark_height_ / (width * height);
+ if (benchmark_iterations < 1) {
+ benchmark_iterations = 1;
+ }
+
+ align_buffer_page_end(dst_y, width * height);
+ align_buffer_page_end(dst_uv, half_width * half_height * 2);
+ for (int times = 0; times < benchmark_iterations; ++times) {
+ ret = MJPGToNV21(kTest0Jpg, kTest0JpgLen, dst_y, width, dst_uv,
+ half_width * 2, width, height, width, height);
+ }
+ // Expect sucesss
+ EXPECT_EQ(0, ret);
+
+ // Test result matches known hash value.
+ uint32_t dst_y_hash = HashDjb2(dst_y, width * height, 5381);
+ uint32_t dst_uv_hash = HashDjb2(dst_uv, half_width * half_height * 2, 5381);
+ EXPECT_EQ(dst_y_hash, 330644005u);
+ EXPECT_EQ(dst_uv_hash, 135214341u);
+
+ free_aligned_buffer_page_end(dst_y);
+ free_aligned_buffer_page_end(dst_uv);
+}
+
+TEST_F(LibYUVConvertTest, TestMJPGToNV12_400) {
+ int width = 0;
+ int height = 0;
+ int ret = MJPGSize(kTest0Jpg, kTest0JpgLen, &width, &height);
+ EXPECT_EQ(0, ret);
+
+ int half_width = (width + 1) / 2;
+ int half_height = (height + 1) / 2;
+ int benchmark_iterations = benchmark_iterations_ * benchmark_width_ *
+ benchmark_height_ / (width * height);
+ if (benchmark_iterations < 1) {
+ benchmark_iterations = 1;
+ }
+
+ align_buffer_page_end(dst_y, width * height);
+ align_buffer_page_end(dst_uv, half_width * half_height * 2);
+ for (int times = 0; times < benchmark_iterations; ++times) {
+ ret = MJPGToNV12(kTest0Jpg, kTest0JpgLen, dst_y, width, dst_uv,
+ half_width * 2, width, height, width, height);
+ }
+ // Expect sucesss
+ EXPECT_EQ(0, ret);
+
+ // Test result matches known hash value. Hashes are for VU so flip the plane.
+ uint32_t dst_y_hash = HashDjb2(dst_y, width * height, 5381);
+ align_buffer_page_end(dst_vu, half_width * half_height * 2);
+ SwapUVPlane(dst_uv, half_width * 2, dst_vu, half_width * 2, half_width,
+ half_height);
+ uint32_t dst_vu_hash = HashDjb2(dst_vu, half_width * half_height * 2, 5381);
+ EXPECT_EQ(dst_y_hash, 330644005u);
+ EXPECT_EQ(dst_vu_hash, 135214341u);
+
+ free_aligned_buffer_page_end(dst_y);
+ free_aligned_buffer_page_end(dst_uv);
+ free_aligned_buffer_page_end(dst_vu);
+}
+
+TEST_F(LibYUVConvertTest, TestMJPGToNV21_444) {
+ int width = 0;
+ int height = 0;
+ int ret = MJPGSize(kTest1Jpg, kTest1JpgLen, &width, &height);
+ EXPECT_EQ(0, ret);
+
+ int half_width = (width + 1) / 2;
+ int half_height = (height + 1) / 2;
+ int benchmark_iterations = benchmark_iterations_ * benchmark_width_ *
+ benchmark_height_ / (width * height);
+ if (benchmark_iterations < 1) {
+ benchmark_iterations = 1;
+ }
+
+ align_buffer_page_end(dst_y, width * height);
+ align_buffer_page_end(dst_uv, half_width * half_height * 2);
+ for (int times = 0; times < benchmark_iterations; ++times) {
+ ret = MJPGToNV21(kTest1Jpg, kTest1JpgLen, dst_y, width, dst_uv,
+ half_width * 2, width, height, width, height);
+ }
+ // Expect sucesss
+ EXPECT_EQ(0, ret);
+
+ // Test result matches known hash value.
+ uint32_t dst_y_hash = HashDjb2(dst_y, width * height, 5381);
+ uint32_t dst_uv_hash = HashDjb2(dst_uv, half_width * half_height * 2, 5381);
+ EXPECT_EQ(dst_y_hash, 2682851208u);
+ EXPECT_EQ(dst_uv_hash, 506143297u);
+
+ free_aligned_buffer_page_end(dst_y);
+ free_aligned_buffer_page_end(dst_uv);
+}
+
+TEST_F(LibYUVConvertTest, TestMJPGToNV12_444) {
+ int width = 0;
+ int height = 0;
+ int ret = MJPGSize(kTest1Jpg, kTest1JpgLen, &width, &height);
+ EXPECT_EQ(0, ret);
+
+ int half_width = (width + 1) / 2;
+ int half_height = (height + 1) / 2;
+ int benchmark_iterations = benchmark_iterations_ * benchmark_width_ *
+ benchmark_height_ / (width * height);
+ if (benchmark_iterations < 1) {
+ benchmark_iterations = 1;
+ }
+
+ align_buffer_page_end(dst_y, width * height);
+ align_buffer_page_end(dst_uv, half_width * half_height * 2);
+ for (int times = 0; times < benchmark_iterations; ++times) {
+ ret = MJPGToNV12(kTest1Jpg, kTest1JpgLen, dst_y, width, dst_uv,
+ half_width * 2, width, height, width, height);
+ }
+ // Expect sucesss
+ EXPECT_EQ(0, ret);
+
+ // Test result matches known hash value. Hashes are for VU so flip the plane.
+ uint32_t dst_y_hash = HashDjb2(dst_y, width * height, 5381);
+ align_buffer_page_end(dst_vu, half_width * half_height * 2);
+ SwapUVPlane(dst_uv, half_width * 2, dst_vu, half_width * 2, half_width,
+ half_height);
+ uint32_t dst_vu_hash = HashDjb2(dst_vu, half_width * half_height * 2, 5381);
+ EXPECT_EQ(dst_y_hash, 2682851208u);
+ EXPECT_EQ(dst_vu_hash, 506143297u);
+
+ free_aligned_buffer_page_end(dst_y);
+ free_aligned_buffer_page_end(dst_uv);
+ free_aligned_buffer_page_end(dst_vu);
+}
+
+TEST_F(LibYUVConvertTest, TestMJPGToARGB) {
+ int width = 0;
+ int height = 0;
+ int ret = MJPGSize(kTest3Jpg, kTest3JpgLen, &width, &height);
+ EXPECT_EQ(0, ret);
+
+ int benchmark_iterations = benchmark_iterations_ * benchmark_width_ *
+ benchmark_height_ / (width * height);
+ if (benchmark_iterations < 1) {
+ benchmark_iterations = 1;
+ }
+
+ align_buffer_page_end(dst_argb, width * height * 4);
+ for (int times = 0; times < benchmark_iterations; ++times) {
+ ret = MJPGToARGB(kTest3Jpg, kTest3JpgLen, dst_argb, width * 4, width,
+ height, width, height);
+ }
+ // Expect sucesss
+ EXPECT_EQ(0, ret);
+
+ // Test result matches known hash value.
+ uint32_t dst_argb_hash = HashDjb2(dst_argb, width * height, 5381);
+#ifdef LIBYUV_UNLIMITED_DATA
+ EXPECT_EQ(dst_argb_hash, 3900633302u);
+#else
+ EXPECT_EQ(dst_argb_hash, 2355976473u);
+#endif
+
+ free_aligned_buffer_page_end(dst_argb);
+}
+
+static int ShowJPegInfo(const uint8_t* sample, size_t sample_size) {
+ MJpegDecoder mjpeg_decoder;
+ LIBYUV_BOOL ret = mjpeg_decoder.LoadFrame(sample, sample_size);
+
+ int width = mjpeg_decoder.GetWidth();
+ int height = mjpeg_decoder.GetHeight();
+
+ // YUV420
+ if (mjpeg_decoder.GetColorSpace() == MJpegDecoder::kColorSpaceYCbCr &&
+ mjpeg_decoder.GetNumComponents() == 3 &&
+ mjpeg_decoder.GetVertSampFactor(0) == 2 &&
+ mjpeg_decoder.GetHorizSampFactor(0) == 2 &&
+ mjpeg_decoder.GetVertSampFactor(1) == 1 &&
+ mjpeg_decoder.GetHorizSampFactor(1) == 1 &&
+ mjpeg_decoder.GetVertSampFactor(2) == 1 &&
+ mjpeg_decoder.GetHorizSampFactor(2) == 1) {
+ printf("JPeg is J420, %dx%d %d bytes\n", width, height,
+ static_cast<int>(sample_size));
+ // YUV422
+ } else if (mjpeg_decoder.GetColorSpace() == MJpegDecoder::kColorSpaceYCbCr &&
+ mjpeg_decoder.GetNumComponents() == 3 &&
+ mjpeg_decoder.GetVertSampFactor(0) == 1 &&
+ mjpeg_decoder.GetHorizSampFactor(0) == 2 &&
+ mjpeg_decoder.GetVertSampFactor(1) == 1 &&
+ mjpeg_decoder.GetHorizSampFactor(1) == 1 &&
+ mjpeg_decoder.GetVertSampFactor(2) == 1 &&
+ mjpeg_decoder.GetHorizSampFactor(2) == 1) {
+ printf("JPeg is J422, %dx%d %d bytes\n", width, height,
+ static_cast<int>(sample_size));
+ // YUV444
+ } else if (mjpeg_decoder.GetColorSpace() == MJpegDecoder::kColorSpaceYCbCr &&
+ mjpeg_decoder.GetNumComponents() == 3 &&
+ mjpeg_decoder.GetVertSampFactor(0) == 1 &&
+ mjpeg_decoder.GetHorizSampFactor(0) == 1 &&
+ mjpeg_decoder.GetVertSampFactor(1) == 1 &&
+ mjpeg_decoder.GetHorizSampFactor(1) == 1 &&
+ mjpeg_decoder.GetVertSampFactor(2) == 1 &&
+ mjpeg_decoder.GetHorizSampFactor(2) == 1) {
+ printf("JPeg is J444, %dx%d %d bytes\n", width, height,
+ static_cast<int>(sample_size));
+ // YUV400
+ } else if (mjpeg_decoder.GetColorSpace() ==
+ MJpegDecoder::kColorSpaceGrayscale &&
+ mjpeg_decoder.GetNumComponents() == 1 &&
+ mjpeg_decoder.GetVertSampFactor(0) == 1 &&
+ mjpeg_decoder.GetHorizSampFactor(0) == 1) {
+ printf("JPeg is J400, %dx%d %d bytes\n", width, height,
+ static_cast<int>(sample_size));
+ } else {
+ // Unknown colorspace.
+ printf("JPeg is Unknown colorspace.\n");
+ }
+ mjpeg_decoder.UnloadFrame();
+ return ret;
+}
+
+TEST_F(LibYUVConvertTest, TestMJPGInfo) {
+ EXPECT_EQ(1, ShowJPegInfo(kTest0Jpg, kTest0JpgLen));
+ EXPECT_EQ(1, ShowJPegInfo(kTest1Jpg, kTest1JpgLen));
+ EXPECT_EQ(1, ShowJPegInfo(kTest2Jpg, kTest2JpgLen));
+ EXPECT_EQ(1, ShowJPegInfo(kTest3Jpg, kTest3JpgLen));
+ EXPECT_EQ(1, ShowJPegInfo(kTest4Jpg,
+ kTest4JpgLen)); // Valid but unsupported.
+}
+#endif // HAVE_JPEG
+
+TEST_F(LibYUVConvertTest, NV12Crop) {
+ const int SUBSAMP_X = 2;
+ const int SUBSAMP_Y = 2;
+ const int kWidth = benchmark_width_;
+ const int kHeight = benchmark_height_;
+ const int crop_y =
+ ((benchmark_height_ - (benchmark_height_ * 360 / 480)) / 2 + 1) & ~1;
+ const int kDestWidth = benchmark_width_;
+ const int kDestHeight = benchmark_height_ - crop_y * 2;
+ const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X);
+ const int sample_size =
+ kWidth * kHeight + kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y) * 2;
+ align_buffer_page_end(src_y, sample_size);
+ uint8_t* src_uv = src_y + kWidth * kHeight;
+
+ align_buffer_page_end(dst_y, kDestWidth * kDestHeight);
+ align_buffer_page_end(dst_u, SUBSAMPLE(kDestWidth, SUBSAMP_X) *
+ SUBSAMPLE(kDestHeight, SUBSAMP_Y));
+ align_buffer_page_end(dst_v, SUBSAMPLE(kDestWidth, SUBSAMP_X) *
+ SUBSAMPLE(kDestHeight, SUBSAMP_Y));
+
+ align_buffer_page_end(dst_y_2, kDestWidth * kDestHeight);
+ align_buffer_page_end(dst_u_2, SUBSAMPLE(kDestWidth, SUBSAMP_X) *
+ SUBSAMPLE(kDestHeight, SUBSAMP_Y));
+ align_buffer_page_end(dst_v_2, SUBSAMPLE(kDestWidth, SUBSAMP_X) *
+ SUBSAMPLE(kDestHeight, SUBSAMP_Y));
+
+ for (int i = 0; i < kHeight * kWidth; ++i) {
+ src_y[i] = (fastrand() & 0xff);
+ }
+ for (int i = 0; i < (SUBSAMPLE(kHeight, SUBSAMP_Y) * kStrideUV) * 2; ++i) {
+ src_uv[i] = (fastrand() & 0xff);
+ }
+ memset(dst_y, 1, kDestWidth * kDestHeight);
+ memset(dst_u, 2,
+ SUBSAMPLE(kDestWidth, SUBSAMP_X) * SUBSAMPLE(kDestHeight, SUBSAMP_Y));
+ memset(dst_v, 3,
+ SUBSAMPLE(kDestWidth, SUBSAMP_X) * SUBSAMPLE(kDestHeight, SUBSAMP_Y));
+ memset(dst_y_2, 1, kDestWidth * kDestHeight);
+ memset(dst_u_2, 2,
+ SUBSAMPLE(kDestWidth, SUBSAMP_X) * SUBSAMPLE(kDestHeight, SUBSAMP_Y));
+ memset(dst_v_2, 3,
+ SUBSAMPLE(kDestWidth, SUBSAMP_X) * SUBSAMPLE(kDestHeight, SUBSAMP_Y));
+
+ ConvertToI420(src_y, sample_size, dst_y_2, kDestWidth, dst_u_2,
+ SUBSAMPLE(kDestWidth, SUBSAMP_X), dst_v_2,
+ SUBSAMPLE(kDestWidth, SUBSAMP_X), 0, crop_y, kWidth, kHeight,
+ kDestWidth, kDestHeight, libyuv::kRotate0, libyuv::FOURCC_NV12);
+
+ NV12ToI420(src_y + crop_y * kWidth, kWidth,
+ src_uv + (crop_y / 2) * kStrideUV * 2, kStrideUV * 2, dst_y,
+ kDestWidth, dst_u, SUBSAMPLE(kDestWidth, SUBSAMP_X), dst_v,
+ SUBSAMPLE(kDestWidth, SUBSAMP_X), kDestWidth, kDestHeight);
+
+ for (int i = 0; i < kDestHeight; ++i) {
+ for (int j = 0; j < kDestWidth; ++j) {
+ EXPECT_EQ(dst_y[i * kWidth + j], dst_y_2[i * kWidth + j]);
+ }
+ }
+ for (int i = 0; i < SUBSAMPLE(kDestHeight, SUBSAMP_Y); ++i) {
+ for (int j = 0; j < SUBSAMPLE(kDestWidth, SUBSAMP_X); ++j) {
+ EXPECT_EQ(dst_u[i * SUBSAMPLE(kDestWidth, SUBSAMP_X) + j],
+ dst_u_2[i * SUBSAMPLE(kDestWidth, SUBSAMP_X) + j]);
+ }
+ }
+ for (int i = 0; i < SUBSAMPLE(kDestHeight, SUBSAMP_Y); ++i) {
+ for (int j = 0; j < SUBSAMPLE(kDestWidth, SUBSAMP_X); ++j) {
+ EXPECT_EQ(dst_v[i * SUBSAMPLE(kDestWidth, SUBSAMP_X) + j],
+ dst_v_2[i * SUBSAMPLE(kDestWidth, SUBSAMP_X) + j]);
+ }
+ }
+ free_aligned_buffer_page_end(dst_y);
+ free_aligned_buffer_page_end(dst_u);
+ free_aligned_buffer_page_end(dst_v);
+ free_aligned_buffer_page_end(dst_y_2);
+ free_aligned_buffer_page_end(dst_u_2);
+ free_aligned_buffer_page_end(dst_v_2);
+ free_aligned_buffer_page_end(src_y);
+}
+
+TEST_F(LibYUVConvertTest, I420CropOddY) {
+ const int SUBSAMP_X = 2;
+ const int SUBSAMP_Y = 2;
+ const int kWidth = benchmark_width_;
+ const int kHeight = benchmark_height_;
+ const int crop_y = benchmark_height_ > 1 ? 1 : 0;
+ const int kDestWidth = benchmark_width_;
+ const int kDestHeight = benchmark_height_ - crop_y * 2;
+ const int kStrideU = SUBSAMPLE(kWidth, SUBSAMP_X);
+ const int kStrideV = SUBSAMPLE(kWidth, SUBSAMP_X);
+ const int sample_size = kWidth * kHeight +
+ kStrideU * SUBSAMPLE(kHeight, SUBSAMP_Y) +
+ kStrideV * SUBSAMPLE(kHeight, SUBSAMP_Y);
+ align_buffer_page_end(src_y, sample_size);
+ uint8_t* src_u = src_y + kWidth * kHeight;
+ uint8_t* src_v = src_u + kStrideU * SUBSAMPLE(kHeight, SUBSAMP_Y);
+
+ align_buffer_page_end(dst_y, kDestWidth * kDestHeight);
+ align_buffer_page_end(dst_u, SUBSAMPLE(kDestWidth, SUBSAMP_X) *
+ SUBSAMPLE(kDestHeight, SUBSAMP_Y));
+ align_buffer_page_end(dst_v, SUBSAMPLE(kDestWidth, SUBSAMP_X) *
+ SUBSAMPLE(kDestHeight, SUBSAMP_Y));
+
+ for (int i = 0; i < kHeight * kWidth; ++i) {
+ src_y[i] = (fastrand() & 0xff);
+ }
+ for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y) * kStrideU; ++i) {
+ src_u[i] = (fastrand() & 0xff);
+ }
+ for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y) * kStrideV; ++i) {
+ src_v[i] = (fastrand() & 0xff);
+ }
+ memset(dst_y, 1, kDestWidth * kDestHeight);
+ memset(dst_u, 2,
+ SUBSAMPLE(kDestWidth, SUBSAMP_X) * SUBSAMPLE(kDestHeight, SUBSAMP_Y));
+ memset(dst_v, 3,
+ SUBSAMPLE(kDestWidth, SUBSAMP_X) * SUBSAMPLE(kDestHeight, SUBSAMP_Y));
+
+ MaskCpuFlags(benchmark_cpu_info_);
+ for (int i = 0; i < benchmark_iterations_; ++i) {
+ ConvertToI420(src_y, sample_size, dst_y, kDestWidth, dst_u,
+ SUBSAMPLE(kDestWidth, SUBSAMP_X), dst_v,
+ SUBSAMPLE(kDestWidth, SUBSAMP_X), 0, crop_y, kWidth, kHeight,
+ kDestWidth, kDestHeight, libyuv::kRotate0,
+ libyuv::FOURCC_I420);
+ }
+
+ for (int i = 0; i < kDestHeight; ++i) {
+ for (int j = 0; j < kDestWidth; ++j) {
+ EXPECT_EQ(src_y[crop_y * kWidth + i * kWidth + j],
+ dst_y[i * kDestWidth + j]);
+ }
+ }
+ for (int i = 0; i < SUBSAMPLE(kDestHeight, SUBSAMP_Y); ++i) {
+ for (int j = 0; j < SUBSAMPLE(kDestWidth, SUBSAMP_X); ++j) {
+ EXPECT_EQ(src_u[(crop_y / 2 + i) * kStrideU + j],
+ dst_u[i * SUBSAMPLE(kDestWidth, SUBSAMP_X) + j]);
+ }
+ }
+ for (int i = 0; i < SUBSAMPLE(kDestHeight, SUBSAMP_Y); ++i) {
+ for (int j = 0; j < SUBSAMPLE(kDestWidth, SUBSAMP_X); ++j) {
+ EXPECT_EQ(src_v[(crop_y / 2 + i) * kStrideV + j],
+ dst_v[i * SUBSAMPLE(kDestWidth, SUBSAMP_X) + j]);
+ }
+ }
+
+ free_aligned_buffer_page_end(dst_y);
+ free_aligned_buffer_page_end(dst_u);
+ free_aligned_buffer_page_end(dst_v);
+ free_aligned_buffer_page_end(src_y);
+}
+
+#define TESTPTOB(NAME, UYVYTOI420, UYVYTONV12) \
+ TEST_F(LibYUVConvertTest, NAME) { \
+ const int kWidth = benchmark_width_; \
+ const int kHeight = benchmark_height_; \
+ \
+ align_buffer_page_end(orig_uyvy, 4 * SUBSAMPLE(kWidth, 2) * kHeight); \
+ align_buffer_page_end(orig_y, kWidth* kHeight); \
+ align_buffer_page_end(orig_u, \
+ SUBSAMPLE(kWidth, 2) * SUBSAMPLE(kHeight, 2)); \
+ align_buffer_page_end(orig_v, \
+ SUBSAMPLE(kWidth, 2) * SUBSAMPLE(kHeight, 2)); \
+ \
+ align_buffer_page_end(dst_y_orig, kWidth* kHeight); \
+ align_buffer_page_end(dst_uv_orig, \
+ 2 * SUBSAMPLE(kWidth, 2) * SUBSAMPLE(kHeight, 2)); \
+ \
+ align_buffer_page_end(dst_y, kWidth* kHeight); \
+ align_buffer_page_end(dst_uv, \
+ 2 * SUBSAMPLE(kWidth, 2) * SUBSAMPLE(kHeight, 2)); \
+ \
+ MemRandomize(orig_uyvy, 4 * SUBSAMPLE(kWidth, 2) * kHeight); \
+ \
+ /* Convert UYVY to NV12 in 2 steps for reference */ \
+ libyuv::UYVYTOI420(orig_uyvy, 4 * SUBSAMPLE(kWidth, 2), orig_y, kWidth, \
+ orig_u, SUBSAMPLE(kWidth, 2), orig_v, \
+ SUBSAMPLE(kWidth, 2), kWidth, kHeight); \
+ libyuv::I420ToNV12(orig_y, kWidth, orig_u, SUBSAMPLE(kWidth, 2), orig_v, \
+ SUBSAMPLE(kWidth, 2), dst_y_orig, kWidth, dst_uv_orig, \
+ 2 * SUBSAMPLE(kWidth, 2), kWidth, kHeight); \
+ \
+ /* Convert to NV12 */ \
+ for (int i = 0; i < benchmark_iterations_; ++i) { \
+ libyuv::UYVYTONV12(orig_uyvy, 4 * SUBSAMPLE(kWidth, 2), dst_y, kWidth, \
+ dst_uv, 2 * SUBSAMPLE(kWidth, 2), kWidth, kHeight); \
+ } \
+ \
+ for (int i = 0; i < kWidth * kHeight; ++i) { \
+ EXPECT_EQ(orig_y[i], dst_y[i]); \
+ } \
+ for (int i = 0; i < kWidth * kHeight; ++i) { \
+ EXPECT_EQ(dst_y_orig[i], dst_y[i]); \
+ } \
+ for (int i = 0; i < 2 * SUBSAMPLE(kWidth, 2) * SUBSAMPLE(kHeight, 2); \
+ ++i) { \
+ EXPECT_EQ(dst_uv_orig[i], dst_uv[i]); \
+ } \
+ \
+ free_aligned_buffer_page_end(orig_uyvy); \
+ free_aligned_buffer_page_end(orig_y); \
+ free_aligned_buffer_page_end(orig_u); \
+ free_aligned_buffer_page_end(orig_v); \
+ free_aligned_buffer_page_end(dst_y_orig); \
+ free_aligned_buffer_page_end(dst_uv_orig); \
+ free_aligned_buffer_page_end(dst_y); \
+ free_aligned_buffer_page_end(dst_uv); \
+ }
+
+TESTPTOB(TestYUY2ToNV12, YUY2ToI420, YUY2ToNV12)
+TESTPTOB(TestUYVYToNV12, UYVYToI420, UYVYToNV12)
+
+TEST_F(LibYUVConvertTest, MM21ToYUY2) {
+ const int kWidth = (benchmark_width_ + 15) & (~15);
+ const int kHeight = (benchmark_height_ + 31) & (~31);
+
+ align_buffer_page_end(orig_y, kWidth * kHeight);
+ align_buffer_page_end(orig_uv,
+ 2 * SUBSAMPLE(kWidth, 2) * SUBSAMPLE(kHeight, 2));
+
+ align_buffer_page_end(tmp_y, kWidth * kHeight);
+ align_buffer_page_end(tmp_u, SUBSAMPLE(kWidth, 2) * SUBSAMPLE(kHeight, 2));
+ align_buffer_page_end(tmp_v, SUBSAMPLE(kWidth, 2) * SUBSAMPLE(kHeight, 2));
+
+ align_buffer_page_end(dst_yuyv, 4 * SUBSAMPLE(kWidth, 2) * kHeight);
+ align_buffer_page_end(golden_yuyv, 4 * SUBSAMPLE(kWidth, 2) * kHeight);
+
+ MemRandomize(orig_y, kWidth * kHeight);
+ MemRandomize(orig_uv, 2 * SUBSAMPLE(kWidth, 2) * SUBSAMPLE(kHeight, 2));
+
+ /* Convert MM21 to YUY2 in 2 steps for reference */
+ libyuv::MM21ToI420(orig_y, kWidth, orig_uv, 2 * SUBSAMPLE(kWidth, 2), tmp_y,
+ kWidth, tmp_u, SUBSAMPLE(kWidth, 2), tmp_v,
+ SUBSAMPLE(kWidth, 2), kWidth, kHeight);
+ libyuv::I420ToYUY2(tmp_y, kWidth, tmp_u, SUBSAMPLE(kWidth, 2), tmp_v,
+ SUBSAMPLE(kWidth, 2), golden_yuyv,
+ 4 * SUBSAMPLE(kWidth, 2), kWidth, kHeight);
+
+ /* Convert to NV12 */
+ for (int i = 0; i < benchmark_iterations_; ++i) {
+ libyuv::MM21ToYUY2(orig_y, kWidth, orig_uv, 2 * SUBSAMPLE(kWidth, 2),
+ dst_yuyv, 4 * SUBSAMPLE(kWidth, 2), kWidth, kHeight);
+ }
+
+ for (int i = 0; i < 4 * SUBSAMPLE(kWidth, 2) * kHeight; ++i) {
+ EXPECT_EQ(dst_yuyv[i], golden_yuyv[i]);
+ }
+
+ free_aligned_buffer_page_end(orig_y);
+ free_aligned_buffer_page_end(orig_uv);
+ free_aligned_buffer_page_end(tmp_y);
+ free_aligned_buffer_page_end(tmp_u);
+ free_aligned_buffer_page_end(tmp_v);
+ free_aligned_buffer_page_end(dst_yuyv);
+ free_aligned_buffer_page_end(golden_yuyv);
+}
+
+// Test RGB24 to J420 is exact
+#if defined(LIBYUV_BIT_EXACT)
+TEST_F(LibYUVConvertTest, TestRGB24ToJ420) {
+ const int kSize = 256;
+ align_buffer_page_end(orig_rgb24, kSize * 3 * 2); // 2 rows of RGB24
+ align_buffer_page_end(dest_j420, kSize * 3 / 2 * 2);
+ int iterations256 = (benchmark_width_ * benchmark_height_ + (kSize * 2 - 1)) /
+ (kSize * 2) * benchmark_iterations_;
+
+ for (int i = 0; i < kSize * 3 * 2; ++i) {
+ orig_rgb24[i] = i;
+ }
+
+ for (int i = 0; i < iterations256; ++i) {
+ RGB24ToJ420(orig_rgb24, kSize * 3, dest_j420, kSize, // Y plane
+ dest_j420 + kSize * 2, kSize / 2, // U plane
+ dest_j420 + kSize * 5 / 2, kSize / 2, // V plane
+ kSize, 2);
+ }
+
+ uint32_t checksum = HashDjb2(dest_j420, kSize * 3 / 2 * 2, 5381);
+ EXPECT_EQ(2755440272u, checksum);
+
+ free_aligned_buffer_page_end(orig_rgb24);
+ free_aligned_buffer_page_end(dest_j420);
+}
+#endif
+
+// Test RGB24 to I420 is exact
+#if defined(LIBYUV_BIT_EXACT)
+TEST_F(LibYUVConvertTest, TestRGB24ToI420) {
+ const int kSize = 256;
+ align_buffer_page_end(orig_rgb24, kSize * 3 * 2); // 2 rows of RGB24
+ align_buffer_page_end(dest_i420, kSize * 3 / 2 * 2);
+ int iterations256 = (benchmark_width_ * benchmark_height_ + (kSize * 2 - 1)) /
+ (kSize * 2) * benchmark_iterations_;
+
+ for (int i = 0; i < kSize * 3 * 2; ++i) {
+ orig_rgb24[i] = i;
+ }
+
+ for (int i = 0; i < iterations256; ++i) {
+ RGB24ToI420(orig_rgb24, kSize * 3, dest_i420, kSize, // Y plane
+ dest_i420 + kSize * 2, kSize / 2, // U plane
+ dest_i420 + kSize * 5 / 2, kSize / 2, // V plane
+ kSize, 2);
+ }
+
+ uint32_t checksum = HashDjb2(dest_i420, kSize * 3 / 2 * 2, 5381);
+ EXPECT_EQ(1526656597u, checksum);
+
+ free_aligned_buffer_page_end(orig_rgb24);
+ free_aligned_buffer_page_end(dest_i420);
+}
+#endif
+
+#endif // !defined(LEAN_TESTS)
+
+} // namespace libyuv
diff --git a/files/unit_test/cpu_test.cc b/unit_test/cpu_test.cc
index 080778f5..437b6632 100644
--- a/files/unit_test/cpu_test.cc
+++ b/unit_test/cpu_test.cc
@@ -20,13 +20,23 @@ namespace libyuv {
TEST_F(LibYUVBaseTest, TestCpuHas) {
int cpu_flags = TestCpuFlag(-1);
- printf("Cpu Flags %d\n", cpu_flags);
+ printf("Cpu Flags 0x%x\n", cpu_flags);
#if defined(__arm__) || defined(__aarch64__)
int has_arm = TestCpuFlag(kCpuHasARM);
- printf("Has ARM %d\n", has_arm);
+ printf("Has ARM 0x%x\n", has_arm);
int has_neon = TestCpuFlag(kCpuHasNEON);
- printf("Has NEON %d\n", has_neon);
+ printf("Has NEON 0x%x\n", has_neon);
#endif
+#if defined(__riscv) && defined(__linux__)
+ int has_riscv = TestCpuFlag(kCpuHasRISCV);
+ printf("Has RISCV 0x%x\n", has_riscv);
+ int has_rvv = TestCpuFlag(kCpuHasRVV);
+ printf("Has RVV 0x%x\n", has_rvv);
+ int has_rvvzvfh = TestCpuFlag(kCpuHasRVVZVFH);
+ printf("Has RVVZVFH 0x%x\n", has_rvvzvfh);
+#endif
+#if defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) || \
+ defined(_M_X64)
int has_x86 = TestCpuFlag(kCpuHasX86);
int has_sse2 = TestCpuFlag(kCpuHasSSE2);
int has_ssse3 = TestCpuFlag(kCpuHasSSSE3);
@@ -37,47 +47,48 @@ TEST_F(LibYUVBaseTest, TestCpuHas) {
int has_erms = TestCpuFlag(kCpuHasERMS);
int has_fma3 = TestCpuFlag(kCpuHasFMA3);
int has_f16c = TestCpuFlag(kCpuHasF16C);
- int has_gfni = TestCpuFlag(kCpuHasGFNI);
int has_avx512bw = TestCpuFlag(kCpuHasAVX512BW);
int has_avx512vl = TestCpuFlag(kCpuHasAVX512VL);
int has_avx512vnni = TestCpuFlag(kCpuHasAVX512VNNI);
int has_avx512vbmi = TestCpuFlag(kCpuHasAVX512VBMI);
int has_avx512vbmi2 = TestCpuFlag(kCpuHasAVX512VBMI2);
int has_avx512vbitalg = TestCpuFlag(kCpuHasAVX512VBITALG);
- int has_avx512vpopcntdq = TestCpuFlag(kCpuHasAVX512VPOPCNTDQ);
- printf("Has X86 %d\n", has_x86);
- printf("Has SSE2 %d\n", has_sse2);
- printf("Has SSSE3 %d\n", has_ssse3);
- printf("Has SSE41 %d\n", has_sse41);
- printf("Has SSE42 %d\n", has_sse42);
- printf("Has AVX %d\n", has_avx);
- printf("Has AVX2 %d\n", has_avx2);
- printf("Has ERMS %d\n", has_erms);
- printf("Has FMA3 %d\n", has_fma3);
- printf("Has F16C %d\n", has_f16c);
- printf("Has GFNI %d\n", has_gfni);
- printf("Has AVX512BW %d\n", has_avx512bw);
- printf("Has AVX512VL %d\n", has_avx512vl);
- printf("Has AVX512VNNI %d\n", has_avx512vnni);
- printf("Has AVX512VBMI %d\n", has_avx512vbmi);
- printf("Has AVX512VBMI2 %d\n", has_avx512vbmi2);
- printf("Has AVX512VBITALG %d\n", has_avx512vbitalg);
- printf("Has AVX512VPOPCNTDQ %d\n", has_avx512vpopcntdq);
-
+ int has_avx10 = TestCpuFlag(kCpuHasAVX10);
+ int has_avxvnni = TestCpuFlag(kCpuHasAVXVNNI);
+ int has_avxvnniint8 = TestCpuFlag(kCpuHasAVXVNNIINT8);
+ printf("Has X86 0x%x\n", has_x86);
+ printf("Has SSE2 0x%x\n", has_sse2);
+ printf("Has SSSE3 0x%x\n", has_ssse3);
+ printf("Has SSE41 0x%x\n", has_sse41);
+ printf("Has SSE42 0x%x\n", has_sse42);
+ printf("Has AVX 0x%x\n", has_avx);
+ printf("Has AVX2 0x%x\n", has_avx2);
+ printf("Has ERMS 0x%x\n", has_erms);
+ printf("Has FMA3 0x%x\n", has_fma3);
+ printf("Has F16C 0x%x\n", has_f16c);
+ printf("Has AVX512BW 0x%x\n", has_avx512bw);
+ printf("Has AVX512VL 0x%x\n", has_avx512vl);
+ printf("Has AVX512VNNI 0x%x\n", has_avx512vnni);
+ printf("Has AVX512VBMI 0x%x\n", has_avx512vbmi);
+ printf("Has AVX512VBMI2 0x%x\n", has_avx512vbmi2);
+ printf("Has AVX512VBITALG 0x%x\n", has_avx512vbitalg);
+ printf("Has AVX10 0x%x\n", has_avx10);
+ printf("HAS AVXVNNI 0x%x\n", has_avxvnni);
+ printf("Has AVXVNNIINT8 0x%x\n", has_avxvnniint8);
+#endif
#if defined(__mips__)
int has_mips = TestCpuFlag(kCpuHasMIPS);
- printf("Has MIPS %d\n", has_mips);
+ printf("Has MIPS 0x%x\n", has_mips);
int has_msa = TestCpuFlag(kCpuHasMSA);
- printf("Has MSA %d\n", has_msa);
+ printf("Has MSA 0x%x\n", has_msa);
#endif
-
#if defined(__loongarch__)
int has_loongarch = TestCpuFlag(kCpuHasLOONGARCH);
- printf("Has LOONGARCH %d\n", has_loongarch);
+ printf("Has LOONGARCH 0x%x\n", has_loongarch);
int has_lsx = TestCpuFlag(kCpuHasLSX);
- printf("Has LSX %d\n", has_lsx);
+ printf("Has LSX 0x%x\n", has_lsx);
int has_lasx = TestCpuFlag(kCpuHasLASX);
- printf("Has LASX %d\n", has_lasx);
+ printf("Has LASX 0x%x\n", has_lasx);
#endif
}
@@ -104,27 +115,36 @@ TEST_F(LibYUVBaseTest, TestCompilerMacros) {
#ifdef __i386__
printf("__i386__ %d\n", __i386__);
#endif
-#ifdef __mips
- printf("__mips %d\n", __mips);
-#endif
-#ifdef __mips_isa_rev
- printf("__mips_isa_rev %d\n", __mips_isa_rev);
-#endif
#ifdef __x86_64__
printf("__x86_64__ %d\n", __x86_64__);
#endif
+#ifdef _M_IX86
+ printf("_M_IX86 %d\n", _M_IX86);
+#endif
+#ifdef _M_X64
+ printf("_M_X64 %d\n", _M_X64);
+#endif
#ifdef _MSC_VER
printf("_MSC_VER %d\n", _MSC_VER);
#endif
#ifdef __aarch64__
printf("__aarch64__ %d\n", __aarch64__);
#endif
-#ifdef __APPLE__
- printf("__APPLE__ %d\n", __APPLE__);
-#endif
#ifdef __arm__
printf("__arm__ %d\n", __arm__);
#endif
+#ifdef __riscv
+ printf("__riscv %d\n", __riscv);
+#endif
+#ifdef __riscv_vector
+ printf("__riscv_vector %d\n", __riscv_vector);
+#endif
+#ifdef __riscv_v_intrinsic
+ printf("__riscv_v_intrinsic %d\n", __riscv_v_intrinsic);
+#endif
+#ifdef __APPLE__
+ printf("__APPLE__ %d\n", __APPLE__);
+#endif
#ifdef __clang__
printf("__clang__ %d\n", __clang__);
#endif
@@ -140,20 +160,11 @@ TEST_F(LibYUVBaseTest, TestCompilerMacros) {
#ifdef __mips_msa
printf("__mips_msa %d\n", __mips_msa);
#endif
-#ifdef __native_client__
- printf("__native_client__ %d\n", __native_client__);
-#endif
-#ifdef __pic__
- printf("__pic__ %d\n", __pic__);
-#endif
-#ifdef __pnacl__
- printf("__pnacl__ %d\n", __pnacl__);
-#endif
-#ifdef _M_IX86
- printf("_M_IX86 %d\n", _M_IX86);
+#ifdef __mips
+ printf("__mips %d\n", __mips);
#endif
-#ifdef _M_X64
- printf("_M_X64 %d\n", _M_X64);
+#ifdef __mips_isa_rev
+ printf("__mips_isa_rev %d\n", __mips_isa_rev);
#endif
#ifdef _MIPS_ARCH_LOONGSON3A
printf("_MIPS_ARCH_LOONGSON3A %d\n", _MIPS_ARCH_LOONGSON3A);
@@ -164,8 +175,17 @@ TEST_F(LibYUVBaseTest, TestCompilerMacros) {
#ifdef _WIN32
printf("_WIN32 %d\n", _WIN32);
#endif
+#ifdef __native_client__
+ printf("__native_client__ %d\n", __native_client__);
+#endif
+#ifdef __pic__
+ printf("__pic__ %d\n", __pic__);
+#endif
+#ifdef __pnacl__
+ printf("__pnacl__ %d\n", __pnacl__);
+#endif
#ifdef GG_LONGLONG
- printf("GG_LONGLONG %d\n", GG_LONGLONG);
+ printf("GG_LONGLONG %lld\n", GG_LONGLONG(1));
#endif
#ifdef INT_TYPES_DEFINED
printf("INT_TYPES_DEFINED\n");
@@ -200,8 +220,9 @@ TEST_F(LibYUVBaseTest, TestCpuId) {
cpu_info[0] = cpu_info[1]; // Reorder output
cpu_info[1] = cpu_info[3];
cpu_info[3] = 0;
- printf("Cpu Vendor: %s %x %x %x\n", reinterpret_cast<char*>(&cpu_info[0]),
- cpu_info[0], cpu_info[1], cpu_info[2]);
+ printf("Cpu Vendor: %s 0x%x 0x%x 0x%x\n",
+ reinterpret_cast<char*>(&cpu_info[0]), cpu_info[0], cpu_info[1],
+ cpu_info[2]);
EXPECT_EQ(12u, strlen(reinterpret_cast<char*>(&cpu_info[0])));
// CPU Family and Model
@@ -264,6 +285,32 @@ TEST_F(LibYUVBaseTest, TestLinuxMipsMsa) {
}
}
+TEST_F(LibYUVBaseTest, TestLinuxRVV) {
+ if (FileExists("../../unit_test/testdata/riscv64.txt")) {
+ printf("Note: testing to load \"../../unit_test/testdata/riscv64.txt\"\n");
+
+ EXPECT_EQ(0, RiscvCpuCaps("../../unit_test/testdata/riscv64.txt"));
+ EXPECT_EQ(kCpuHasRVV,
+ RiscvCpuCaps("../../unit_test/testdata/riscv64_rvv.txt"));
+ EXPECT_EQ(kCpuHasRVV | kCpuHasRVVZVFH,
+ RiscvCpuCaps("../../unit_test/testdata/riscv64_rvv_zvfh.txt"));
+ } else {
+ printf(
+ "WARNING: unable to load "
+ "\"../../unit_test/testdata/riscv64.txt\"\n");
+ }
+#if defined(__linux__) && defined(__riscv)
+ if (FileExists("/proc/cpuinfo")) {
+ if (!(kCpuHasRVV & RiscvCpuCaps("/proc/cpuinfo"))) {
+ // This can happen on RVV emulator but /proc/cpuinfo is from host.
+ printf("WARNING: RVV build enabled but CPU does not have RVV\n");
+ }
+ } else {
+ printf("WARNING: unable to load \"/proc/cpuinfo\"\n");
+ }
+#endif
+}
+
// TODO(fbarchard): Fix clangcl test of cpuflags.
#ifdef _MSC_VER
TEST_F(LibYUVBaseTest, DISABLED_TestSetCpuFlags) {
diff --git a/files/unit_test/cpu_thread_test.cc b/unit_test/cpu_thread_test.cc
index 69aab74e..69aab74e 100644
--- a/files/unit_test/cpu_thread_test.cc
+++ b/unit_test/cpu_thread_test.cc
diff --git a/files/unit_test/math_test.cc b/unit_test/math_test.cc
index a1544c12..a1544c12 100644
--- a/files/unit_test/math_test.cc
+++ b/unit_test/math_test.cc
diff --git a/files/unit_test/planar_test.cc b/unit_test/planar_test.cc
index 3a8c470b..ec1d72eb 100644
--- a/files/unit_test/planar_test.cc
+++ b/unit_test/planar_test.cc
@@ -30,9 +30,9 @@
#endif
#if defined(LIBYUV_BIT_EXACT)
-#define EXPECTED_ATTENUATE_DIFF 0
+#define EXPECTED_UNATTENUATE_DIFF 0
#else
-#define EXPECTED_ATTENUATE_DIFF 2
+#define EXPECTED_UNATTENUATE_DIFF 2
#endif
namespace libyuv {
@@ -57,12 +57,17 @@ TEST_F(LibYUVPlanarTest, TestAttenuate) {
orig_pixels[2 * 4 + 0] = 16u;
orig_pixels[2 * 4 + 1] = 64u;
orig_pixels[2 * 4 + 2] = 192u;
- orig_pixels[2 * 4 + 3] = 255u;
+ orig_pixels[2 * 4 + 3] = 128u;
orig_pixels[3 * 4 + 0] = 16u;
orig_pixels[3 * 4 + 1] = 64u;
orig_pixels[3 * 4 + 2] = 192u;
- orig_pixels[3 * 4 + 3] = 128u;
- ARGBUnattenuate(orig_pixels, 0, unatten_pixels, 0, 4, 1);
+ orig_pixels[3 * 4 + 3] = 255u;
+ orig_pixels[4 * 4 + 0] = 255u;
+ orig_pixels[4 * 4 + 1] = 255u;
+ orig_pixels[4 * 4 + 2] = 255u;
+ orig_pixels[4 * 4 + 3] = 255u;
+
+ ARGBUnattenuate(orig_pixels, 0, unatten_pixels, 0, 5, 1);
EXPECT_EQ(255u, unatten_pixels[0 * 4 + 0]);
EXPECT_EQ(255u, unatten_pixels[0 * 4 + 1]);
EXPECT_EQ(254u, unatten_pixels[0 * 4 + 2]);
@@ -71,14 +76,55 @@ TEST_F(LibYUVPlanarTest, TestAttenuate) {
EXPECT_EQ(0u, unatten_pixels[1 * 4 + 1]);
EXPECT_EQ(0u, unatten_pixels[1 * 4 + 2]);
EXPECT_EQ(0u, unatten_pixels[1 * 4 + 3]);
- EXPECT_EQ(16u, unatten_pixels[2 * 4 + 0]);
- EXPECT_EQ(64u, unatten_pixels[2 * 4 + 1]);
- EXPECT_EQ(192u, unatten_pixels[2 * 4 + 2]);
- EXPECT_EQ(255u, unatten_pixels[2 * 4 + 3]);
- EXPECT_EQ(32u, unatten_pixels[3 * 4 + 0]);
- EXPECT_EQ(128u, unatten_pixels[3 * 4 + 1]);
- EXPECT_EQ(255u, unatten_pixels[3 * 4 + 2]);
- EXPECT_EQ(128u, unatten_pixels[3 * 4 + 3]);
+ EXPECT_EQ(32u, unatten_pixels[2 * 4 + 0]);
+ EXPECT_EQ(128u, unatten_pixels[2 * 4 + 1]);
+ EXPECT_EQ(255u, unatten_pixels[2 * 4 + 2]);
+ EXPECT_EQ(128u, unatten_pixels[2 * 4 + 3]);
+ EXPECT_EQ(16u, unatten_pixels[3 * 4 + 0]);
+ EXPECT_EQ(64u, unatten_pixels[3 * 4 + 1]);
+ EXPECT_EQ(192u, unatten_pixels[3 * 4 + 2]);
+ EXPECT_EQ(255u, unatten_pixels[3 * 4 + 3]);
+ EXPECT_EQ(255u, unatten_pixels[4 * 4 + 0]);
+ EXPECT_EQ(255u, unatten_pixels[4 * 4 + 1]);
+ EXPECT_EQ(255u, unatten_pixels[4 * 4 + 2]);
+ EXPECT_EQ(255u, unatten_pixels[4 * 4 + 3]);
+
+ ARGBAttenuate(orig_pixels, 0, atten_pixels, 0, 5, 1);
+ EXPECT_EQ(100u, atten_pixels[0 * 4 + 0]);
+ EXPECT_EQ(65u, atten_pixels[0 * 4 + 1]);
+ EXPECT_EQ(64u, atten_pixels[0 * 4 + 2]);
+ EXPECT_EQ(128u, atten_pixels[0 * 4 + 3]);
+ EXPECT_EQ(0u, atten_pixels[1 * 4 + 0]);
+ EXPECT_EQ(0u, atten_pixels[1 * 4 + 1]);
+ EXPECT_EQ(0u, atten_pixels[1 * 4 + 2]);
+ EXPECT_EQ(0u, atten_pixels[1 * 4 + 3]);
+ EXPECT_EQ(8u, atten_pixels[2 * 4 + 0]);
+ EXPECT_EQ(32u, atten_pixels[2 * 4 + 1]);
+ EXPECT_EQ(96u, atten_pixels[2 * 4 + 2]);
+ EXPECT_EQ(128u, atten_pixels[2 * 4 + 3]);
+ EXPECT_EQ(16u, atten_pixels[3 * 4 + 0]);
+ EXPECT_EQ(64u, atten_pixels[3 * 4 + 1]);
+ EXPECT_EQ(192u, atten_pixels[3 * 4 + 2]);
+ EXPECT_EQ(255u, atten_pixels[3 * 4 + 3]);
+ EXPECT_EQ(255u, atten_pixels[4 * 4 + 0]);
+ EXPECT_EQ(255u, atten_pixels[4 * 4 + 1]);
+ EXPECT_EQ(255u, atten_pixels[4 * 4 + 2]);
+ EXPECT_EQ(255u, atten_pixels[4 * 4 + 3]);
+
+ // test 255
+ for (int i = 0; i < 256; ++i) {
+ orig_pixels[i * 4 + 0] = i;
+ orig_pixels[i * 4 + 1] = 0;
+ orig_pixels[i * 4 + 2] = 0;
+ orig_pixels[i * 4 + 3] = 255;
+ }
+ ARGBAttenuate(orig_pixels, 0, atten_pixels, 0, 256, 1);
+ for (int i = 0; i < 256; ++i) {
+ EXPECT_EQ(orig_pixels[i * 4 + 0], atten_pixels[i * 4 + 0]);
+ EXPECT_EQ(0, atten_pixels[i * 4 + 1]);
+ EXPECT_EQ(0, atten_pixels[i * 4 + 2]);
+ EXPECT_EQ(255, atten_pixels[i * 4 + 3]);
+ }
for (int i = 0; i < 1280; ++i) {
orig_pixels[i * 4 + 0] = i;
@@ -92,10 +138,10 @@ TEST_F(LibYUVPlanarTest, TestAttenuate) {
ARGBAttenuate(unatten_pixels, 0, atten2_pixels, 0, 1280, 1);
}
for (int i = 0; i < 1280; ++i) {
- EXPECT_NEAR(atten_pixels[i * 4 + 0], atten2_pixels[i * 4 + 0], 2);
- EXPECT_NEAR(atten_pixels[i * 4 + 1], atten2_pixels[i * 4 + 1], 2);
- EXPECT_NEAR(atten_pixels[i * 4 + 2], atten2_pixels[i * 4 + 2], 2);
- EXPECT_NEAR(atten_pixels[i * 4 + 3], atten2_pixels[i * 4 + 3], 2);
+ EXPECT_NEAR(atten_pixels[i * 4 + 0], atten2_pixels[i * 4 + 0], 1);
+ EXPECT_NEAR(atten_pixels[i * 4 + 1], atten2_pixels[i * 4 + 1], 1);
+ EXPECT_NEAR(atten_pixels[i * 4 + 2], atten2_pixels[i * 4 + 2], 1);
+ EXPECT_NEAR(atten_pixels[i * 4 + 3], atten2_pixels[i * 4 + 3], 1);
}
// Make sure transparent, 50% and opaque are fully accurate.
EXPECT_EQ(0, atten_pixels[0 * 4 + 0]);
@@ -106,9 +152,9 @@ TEST_F(LibYUVPlanarTest, TestAttenuate) {
EXPECT_EQ(32, atten_pixels[128 * 4 + 1]);
EXPECT_EQ(21, atten_pixels[128 * 4 + 2]);
EXPECT_EQ(128, atten_pixels[128 * 4 + 3]);
- EXPECT_NEAR(254, atten_pixels[255 * 4 + 0], EXPECTED_ATTENUATE_DIFF);
- EXPECT_NEAR(127, atten_pixels[255 * 4 + 1], EXPECTED_ATTENUATE_DIFF);
- EXPECT_NEAR(85, atten_pixels[255 * 4 + 2], EXPECTED_ATTENUATE_DIFF);
+ EXPECT_EQ(255, atten_pixels[255 * 4 + 0]);
+ EXPECT_EQ(127, atten_pixels[255 * 4 + 1]);
+ EXPECT_EQ(85, atten_pixels[255 * 4 + 2]);
EXPECT_EQ(255, atten_pixels[255 * 4 + 3]);
free_aligned_buffer_page_end(atten2_pixels);
@@ -165,28 +211,28 @@ TEST_F(LibYUVPlanarTest, ARGBAttenuate_Any) {
benchmark_iterations_, disable_cpu_flags_,
benchmark_cpu_info_, +1, 0);
- EXPECT_LE(max_diff, EXPECTED_ATTENUATE_DIFF);
+ EXPECT_EQ(max_diff, 0);
}
TEST_F(LibYUVPlanarTest, ARGBAttenuate_Unaligned) {
int max_diff =
TestAttenuateI(benchmark_width_, benchmark_height_, benchmark_iterations_,
disable_cpu_flags_, benchmark_cpu_info_, +1, 1);
- EXPECT_LE(max_diff, EXPECTED_ATTENUATE_DIFF);
+ EXPECT_EQ(max_diff, 0);
}
TEST_F(LibYUVPlanarTest, ARGBAttenuate_Invert) {
int max_diff =
TestAttenuateI(benchmark_width_, benchmark_height_, benchmark_iterations_,
disable_cpu_flags_, benchmark_cpu_info_, -1, 0);
- EXPECT_LE(max_diff, EXPECTED_ATTENUATE_DIFF);
+ EXPECT_EQ(max_diff, 0);
}
TEST_F(LibYUVPlanarTest, ARGBAttenuate_Opt) {
int max_diff =
TestAttenuateI(benchmark_width_, benchmark_height_, benchmark_iterations_,
disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
- EXPECT_LE(max_diff, EXPECTED_ATTENUATE_DIFF);
+ EXPECT_EQ(max_diff, 0);
}
static int TestUnattenuateI(int width,
@@ -238,28 +284,28 @@ TEST_F(LibYUVPlanarTest, ARGBUnattenuate_Any) {
int max_diff = TestUnattenuateI(benchmark_width_ + 1, benchmark_height_,
benchmark_iterations_, disable_cpu_flags_,
benchmark_cpu_info_, +1, 0);
- EXPECT_LE(max_diff, EXPECTED_ATTENUATE_DIFF);
+ EXPECT_LE(max_diff, EXPECTED_UNATTENUATE_DIFF);
}
TEST_F(LibYUVPlanarTest, ARGBUnattenuate_Unaligned) {
int max_diff = TestUnattenuateI(benchmark_width_, benchmark_height_,
benchmark_iterations_, disable_cpu_flags_,
benchmark_cpu_info_, +1, 1);
- EXPECT_LE(max_diff, EXPECTED_ATTENUATE_DIFF);
+ EXPECT_LE(max_diff, EXPECTED_UNATTENUATE_DIFF);
}
TEST_F(LibYUVPlanarTest, ARGBUnattenuate_Invert) {
int max_diff = TestUnattenuateI(benchmark_width_, benchmark_height_,
benchmark_iterations_, disable_cpu_flags_,
benchmark_cpu_info_, -1, 0);
- EXPECT_LE(max_diff, EXPECTED_ATTENUATE_DIFF);
+ EXPECT_LE(max_diff, EXPECTED_UNATTENUATE_DIFF);
}
TEST_F(LibYUVPlanarTest, ARGBUnattenuate_Opt) {
int max_diff = TestUnattenuateI(benchmark_width_, benchmark_height_,
benchmark_iterations_, disable_cpu_flags_,
benchmark_cpu_info_, +1, 0);
- EXPECT_LE(max_diff, EXPECTED_ATTENUATE_DIFF);
+ EXPECT_LE(max_diff, EXPECTED_UNATTENUATE_DIFF);
}
TEST_F(LibYUVPlanarTest, TestARGBComputeCumulativeSum) {
@@ -1638,29 +1684,29 @@ TEST_F(LibYUVPlanarTest, TestDetilePlane) {
int i, j;
// orig is tiled. Allocate enough memory for tiles.
- int orig_width = (benchmark_width_ + 15) & ~15;
- int orig_height = (benchmark_height_ + 15) & ~15;
- int orig_plane_size = orig_width * orig_height;
+ int tile_width = (benchmark_width_ + 15) & ~15;
+ int tile_height = (benchmark_height_ + 15) & ~15;
+ int tile_plane_size = tile_width * tile_height;
int y_plane_size = benchmark_width_ * benchmark_height_;
- align_buffer_page_end(orig_y, orig_plane_size);
+ align_buffer_page_end(tile_y, tile_plane_size);
align_buffer_page_end(dst_c, y_plane_size);
align_buffer_page_end(dst_opt, y_plane_size);
- MemRandomize(orig_y, orig_plane_size);
+ MemRandomize(tile_y, tile_plane_size);
memset(dst_c, 0, y_plane_size);
memset(dst_opt, 0, y_plane_size);
// Disable all optimizations.
MaskCpuFlags(disable_cpu_flags_);
for (j = 0; j < benchmark_iterations_; j++) {
- DetilePlane(orig_y, orig_width, dst_c, benchmark_width_, benchmark_width_,
+ DetilePlane(tile_y, tile_width, dst_c, benchmark_width_, benchmark_width_,
benchmark_height_, 16);
}
// Enable optimizations.
MaskCpuFlags(benchmark_cpu_info_);
for (j = 0; j < benchmark_iterations_; j++) {
- DetilePlane(orig_y, orig_width, dst_opt, benchmark_width_, benchmark_width_,
+ DetilePlane(tile_y, tile_width, dst_opt, benchmark_width_, benchmark_width_,
benchmark_height_, 16);
}
@@ -1668,7 +1714,46 @@ TEST_F(LibYUVPlanarTest, TestDetilePlane) {
EXPECT_EQ(dst_c[i], dst_opt[i]);
}
- free_aligned_buffer_page_end(orig_y);
+ free_aligned_buffer_page_end(tile_y);
+ free_aligned_buffer_page_end(dst_c);
+ free_aligned_buffer_page_end(dst_opt);
+}
+
+TEST_F(LibYUVPlanarTest, TestDetilePlane_16) {
+ int i, j;
+
+ // orig is tiled. Allocate enough memory for tiles.
+ int tile_width = (benchmark_width_ + 15) & ~15;
+ int tile_height = (benchmark_height_ + 15) & ~15;
+ int tile_plane_size = tile_width * tile_height * 2;
+ int y_plane_size = benchmark_width_ * benchmark_height_ * 2;
+ align_buffer_page_end(tile_y, tile_plane_size);
+ align_buffer_page_end(dst_c, y_plane_size);
+ align_buffer_page_end(dst_opt, y_plane_size);
+
+ MemRandomize(tile_y, tile_plane_size);
+ memset(dst_c, 0, y_plane_size);
+ memset(dst_opt, 0, y_plane_size);
+
+ // Disable all optimizations.
+ MaskCpuFlags(disable_cpu_flags_);
+ for (j = 0; j < benchmark_iterations_; j++) {
+ DetilePlane_16((const uint16_t*)tile_y, tile_width, (uint16_t*)dst_c,
+ benchmark_width_, benchmark_width_, benchmark_height_, 16);
+ }
+
+ // Enable optimizations.
+ MaskCpuFlags(benchmark_cpu_info_);
+ for (j = 0; j < benchmark_iterations_; j++) {
+ DetilePlane_16((const uint16_t*)tile_y, tile_width, (uint16_t*)dst_opt,
+ benchmark_width_, benchmark_width_, benchmark_height_, 16);
+ }
+
+ for (i = 0; i < y_plane_size; ++i) {
+ EXPECT_EQ(dst_c[i], dst_opt[i]);
+ }
+
+ free_aligned_buffer_page_end(tile_y);
free_aligned_buffer_page_end(dst_c);
free_aligned_buffer_page_end(dst_opt);
}
@@ -1678,33 +1763,33 @@ TEST_F(LibYUVPlanarTest, TestDetileSplitUVPlane_Correctness) {
int i, j;
// orig is tiled. Allocate enough memory for tiles.
- int orig_width = (benchmark_width_ + 15) & ~15;
- int orig_height = (benchmark_height_ + 15) & ~15;
- int orig_plane_size = orig_width * orig_height;
+ int tile_width = (benchmark_width_ + 15) & ~15;
+ int tile_height = (benchmark_height_ + 15) & ~15;
+ int tile_plane_size = tile_width * tile_height;
int uv_plane_size = ((benchmark_width_ + 1) / 2) * benchmark_height_;
- align_buffer_page_end(orig_uv, orig_plane_size);
- align_buffer_page_end(detiled_uv, orig_plane_size);
+ align_buffer_page_end(tile_uv, tile_plane_size);
+ align_buffer_page_end(detiled_uv, tile_plane_size);
align_buffer_page_end(dst_u_two_stage, uv_plane_size);
align_buffer_page_end(dst_u_opt, uv_plane_size);
align_buffer_page_end(dst_v_two_stage, uv_plane_size);
align_buffer_page_end(dst_v_opt, uv_plane_size);
- MemRandomize(orig_uv, orig_plane_size);
- memset(detiled_uv, 0, orig_plane_size);
+ MemRandomize(tile_uv, tile_plane_size);
+ memset(detiled_uv, 0, tile_plane_size);
memset(dst_u_two_stage, 0, uv_plane_size);
memset(dst_u_opt, 0, uv_plane_size);
memset(dst_v_two_stage, 0, uv_plane_size);
memset(dst_v_opt, 0, uv_plane_size);
- DetileSplitUVPlane(orig_uv, orig_width, dst_u_opt, (benchmark_width_ + 1) / 2,
+ DetileSplitUVPlane(tile_uv, tile_width, dst_u_opt, (benchmark_width_ + 1) / 2,
dst_v_opt, (benchmark_width_ + 1) / 2, benchmark_width_,
benchmark_height_, 16);
// Benchmark 2 step conversion for comparison.
for (j = 0; j < benchmark_iterations_; j++) {
- DetilePlane(orig_uv, orig_width, detiled_uv, benchmark_width_,
+ DetilePlane(tile_uv, tile_width, detiled_uv, benchmark_width_,
benchmark_width_, benchmark_height_, 16);
- SplitUVPlane(detiled_uv, orig_width, dst_u_two_stage,
+ SplitUVPlane(detiled_uv, tile_width, dst_u_two_stage,
(benchmark_width_ + 1) / 2, dst_v_two_stage,
(benchmark_width_ + 1) / 2, (benchmark_width_ + 1) / 2,
benchmark_height_);
@@ -1715,7 +1800,7 @@ TEST_F(LibYUVPlanarTest, TestDetileSplitUVPlane_Correctness) {
EXPECT_EQ(dst_v_two_stage[i], dst_v_opt[i]);
}
- free_aligned_buffer_page_end(orig_uv);
+ free_aligned_buffer_page_end(tile_uv);
free_aligned_buffer_page_end(detiled_uv);
free_aligned_buffer_page_end(dst_u_two_stage);
free_aligned_buffer_page_end(dst_u_opt);
@@ -1727,17 +1812,17 @@ TEST_F(LibYUVPlanarTest, TestDetileSplitUVPlane_Benchmark) {
int i, j;
// orig is tiled. Allocate enough memory for tiles.
- int orig_width = (benchmark_width_ + 15) & ~15;
- int orig_height = (benchmark_height_ + 15) & ~15;
- int orig_plane_size = orig_width * orig_height;
+ int tile_width = (benchmark_width_ + 15) & ~15;
+ int tile_height = (benchmark_height_ + 15) & ~15;
+ int tile_plane_size = tile_width * tile_height;
int uv_plane_size = ((benchmark_width_ + 1) / 2) * benchmark_height_;
- align_buffer_page_end(orig_uv, orig_plane_size);
+ align_buffer_page_end(tile_uv, tile_plane_size);
align_buffer_page_end(dst_u_c, uv_plane_size);
align_buffer_page_end(dst_u_opt, uv_plane_size);
align_buffer_page_end(dst_v_c, uv_plane_size);
align_buffer_page_end(dst_v_opt, uv_plane_size);
- MemRandomize(orig_uv, orig_plane_size);
+ MemRandomize(tile_uv, tile_plane_size);
memset(dst_u_c, 0, uv_plane_size);
memset(dst_u_opt, 0, uv_plane_size);
memset(dst_v_c, 0, uv_plane_size);
@@ -1746,7 +1831,7 @@ TEST_F(LibYUVPlanarTest, TestDetileSplitUVPlane_Benchmark) {
// Disable all optimizations.
MaskCpuFlags(disable_cpu_flags_);
- DetileSplitUVPlane(orig_uv, orig_width, dst_u_c, (benchmark_width_ + 1) / 2,
+ DetileSplitUVPlane(tile_uv, tile_width, dst_u_c, (benchmark_width_ + 1) / 2,
dst_v_c, (benchmark_width_ + 1) / 2, benchmark_width_,
benchmark_height_, 16);
@@ -1755,7 +1840,7 @@ TEST_F(LibYUVPlanarTest, TestDetileSplitUVPlane_Benchmark) {
for (j = 0; j < benchmark_iterations_; j++) {
DetileSplitUVPlane(
- orig_uv, orig_width, dst_u_opt, (benchmark_width_ + 1) / 2, dst_v_opt,
+ tile_uv, tile_width, dst_u_opt, (benchmark_width_ + 1) / 2, dst_v_opt,
(benchmark_width_ + 1) / 2, benchmark_width_, benchmark_height_, 16);
}
@@ -1764,7 +1849,7 @@ TEST_F(LibYUVPlanarTest, TestDetileSplitUVPlane_Benchmark) {
EXPECT_EQ(dst_v_c[i], dst_v_opt[i]);
}
- free_aligned_buffer_page_end(orig_uv);
+ free_aligned_buffer_page_end(tile_uv);
free_aligned_buffer_page_end(dst_u_c);
free_aligned_buffer_page_end(dst_u_opt);
free_aligned_buffer_page_end(dst_v_c);
@@ -2710,12 +2795,23 @@ TEST_F(LibYUVPlanarTest, TestARGBExtractAlpha) {
MaskCpuFlags(disable_cpu_flags_);
ARGBExtractAlpha(src_pixels, benchmark_width_ * 4, dst_pixels_c,
benchmark_width_, benchmark_width_, benchmark_height_);
- MaskCpuFlags(benchmark_cpu_info_);
+ double c_time = get_time();
+ ARGBExtractAlpha(src_pixels, benchmark_width_ * 4, dst_pixels_c,
+ benchmark_width_, benchmark_width_, benchmark_height_);
+ c_time = (get_time() - c_time);
+ MaskCpuFlags(benchmark_cpu_info_);
+ ARGBExtractAlpha(src_pixels, benchmark_width_ * 4, dst_pixels_opt,
+ benchmark_width_, benchmark_width_, benchmark_height_);
+ double opt_time = get_time();
for (int i = 0; i < benchmark_iterations_; ++i) {
ARGBExtractAlpha(src_pixels, benchmark_width_ * 4, dst_pixels_opt,
benchmark_width_, benchmark_width_, benchmark_height_);
}
+ opt_time = (get_time() - opt_time) / benchmark_iterations_;
+ // Report performance of C vs OPT
+ printf("%8d us C - %8d us OPT\n", static_cast<int>(c_time * 1e6),
+ static_cast<int>(opt_time * 1e6));
for (int i = 0; i < kPixels; ++i) {
EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
}
@@ -2738,12 +2834,24 @@ TEST_F(LibYUVPlanarTest, TestARGBCopyYToAlpha) {
MaskCpuFlags(disable_cpu_flags_);
ARGBCopyYToAlpha(orig_pixels, benchmark_width_, dst_pixels_c,
benchmark_width_ * 4, benchmark_width_, benchmark_height_);
- MaskCpuFlags(benchmark_cpu_info_);
+ double c_time = get_time();
+ ARGBCopyYToAlpha(orig_pixels, benchmark_width_, dst_pixels_c,
+ benchmark_width_ * 4, benchmark_width_, benchmark_height_);
+ c_time = (get_time() - c_time);
+ MaskCpuFlags(benchmark_cpu_info_);
+ ARGBCopyYToAlpha(orig_pixels, benchmark_width_, dst_pixels_opt,
+ benchmark_width_ * 4, benchmark_width_, benchmark_height_);
+ double opt_time = get_time();
for (int i = 0; i < benchmark_iterations_; ++i) {
ARGBCopyYToAlpha(orig_pixels, benchmark_width_, dst_pixels_opt,
benchmark_width_ * 4, benchmark_width_, benchmark_height_);
}
+ opt_time = (get_time() - opt_time) / benchmark_iterations_;
+
+ // Report performance of C vs OPT
+ printf("%8d us C - %8d us OPT\n", static_cast<int>(c_time * 1e6),
+ static_cast<int>(opt_time * 1e6));
for (int i = 0; i < kPixels * 4; ++i) {
EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
}
@@ -3495,8 +3603,8 @@ TESTTPLANARTOP(MergeXR30, uint16_t, uint8_t, 16)
// TODO(fbarchard): improve test for platforms and cpu detect
#ifdef HAS_MERGEUVROW_16_AVX2
TEST_F(LibYUVPlanarTest, MergeUVRow_16_Opt) {
- // Round count up to multiple of 16
- const int kPixels = (benchmark_width_ * benchmark_height_ + 15) & ~15;
+ // Round count up to multiple of 8
+ const int kPixels = (benchmark_width_ * benchmark_height_ + 7) & ~7;
align_buffer_page_end(src_pixels_u, kPixels * 2);
align_buffer_page_end(src_pixels_v, kPixels * 2);
@@ -4429,4 +4537,83 @@ TEST_F(LibYUVPlanarTest, NV21Copy) {
free_aligned_buffer_page_end(dst_vu);
}
+#if defined(ENABLE_ROW_TESTS) && !defined(LIBYUV_DISABLE_NEON) && \
+ defined(__aarch64__)
+
+TEST_F(LibYUVPlanarTest, TestConvertFP16ToFP32) {
+ int i, j;
+ const int y_plane_size = benchmark_width_ * benchmark_height_;
+
+ align_buffer_page_end(orig_f, y_plane_size * 4);
+ align_buffer_page_end(orig_y, y_plane_size * 2);
+ align_buffer_page_end(dst_opt, y_plane_size * 4);
+ align_buffer_page_end(rec_opt, y_plane_size * 2);
+
+ for (i = 0; i < y_plane_size; ++i) {
+ ((float*)orig_f)[i] = (float)(i % 10000) * 3.14f;
+ }
+ memset(orig_y, 1, y_plane_size * 2);
+ memset(dst_opt, 2, y_plane_size * 4);
+ memset(rec_opt, 3, y_plane_size * 2);
+
+ ConvertFP32ToFP16Row_NEON((const float*)orig_f, (uint16_t*)orig_y,
+ y_plane_size);
+
+ for (j = 0; j < benchmark_iterations_; j++) {
+ ConvertFP16ToFP32Row_NEON((const uint16_t*)orig_y, (float*)dst_opt,
+ y_plane_size);
+ }
+
+ ConvertFP32ToFP16Row_NEON((const float*)dst_opt, (uint16_t*)rec_opt,
+ y_plane_size);
+
+ for (i = 0; i < y_plane_size; ++i) {
+ EXPECT_EQ(((const uint16_t*)orig_y)[i], ((const uint16_t*)rec_opt)[i]);
+ }
+
+ free_aligned_buffer_page_end(orig_f);
+ free_aligned_buffer_page_end(orig_y);
+ free_aligned_buffer_page_end(dst_opt);
+ free_aligned_buffer_page_end(rec_opt);
+}
+
+TEST_F(LibYUVPlanarTest, TestConvertFP16ToFP32Column) {
+ int i, j;
+ const int y_plane_size = benchmark_width_ * benchmark_height_;
+
+ align_buffer_page_end(orig_f, y_plane_size * 4);
+ align_buffer_page_end(orig_y, y_plane_size * 2);
+ align_buffer_page_end(dst_opt, y_plane_size * 4);
+ align_buffer_page_end(rec_opt, y_plane_size * 2);
+
+ for (i = 0; i < y_plane_size; ++i) {
+ ((float*)orig_f)[i] = (float)(i % 10000) * 3.14f;
+ }
+ memset(orig_y, 1, y_plane_size * 2);
+ memset(dst_opt, 2, y_plane_size * 4);
+ memset(rec_opt, 3, y_plane_size * 2);
+
+ ConvertFP32ToFP16Row_NEON((const float*)orig_f, (uint16_t*)orig_y,
+ y_plane_size);
+
+ for (j = 0; j < benchmark_iterations_; j++) {
+ ConvertFP16ToFP32Column_NEON((const uint16_t*)orig_y, 1, (float*)dst_opt,
+ y_plane_size);
+ }
+
+ ConvertFP32ToFP16Row_NEON((const float*)dst_opt, (uint16_t*)rec_opt,
+ y_plane_size);
+
+ for (i = 0; i < y_plane_size; ++i) {
+ EXPECT_EQ(((const uint16_t*)orig_y)[i], ((const uint16_t*)rec_opt)[i]);
+ }
+
+ free_aligned_buffer_page_end(orig_f);
+ free_aligned_buffer_page_end(orig_y);
+ free_aligned_buffer_page_end(dst_opt);
+ free_aligned_buffer_page_end(rec_opt);
+}
+
+#endif // defined(ENABLE_ROW_TESTS) && defined(__aarch64__)
+
} // namespace libyuv
diff --git a/files/unit_test/rotate_argb_test.cc b/unit_test/rotate_argb_test.cc
index 01ed69ca..74952c4e 100644
--- a/files/unit_test/rotate_argb_test.cc
+++ b/unit_test/rotate_argb_test.cc
@@ -225,4 +225,110 @@ TEST_F(LibYUVRotateTest, RotatePlane90_TestStride) {
free_aligned_buffer_page_end(src_argb);
}
+static void TestRotatePlane_16(int src_width,
+ int src_height,
+ int dst_width,
+ int dst_height,
+ libyuv::RotationMode mode,
+ int benchmark_iterations,
+ int disable_cpu_flags,
+ int benchmark_cpu_info) {
+ if (src_width < 1) {
+ src_width = 1;
+ }
+ if (src_height < 1) {
+ src_height = 1;
+ }
+ if (dst_width < 1) {
+ dst_width = 1;
+ }
+ if (dst_height < 1) {
+ dst_height = 1;
+ }
+ int src_stride = src_width;
+ int src_plane_size = src_stride * abs(src_height);
+ align_buffer_page_end_16(src, src_plane_size);
+ for (int i = 0; i < src_plane_size; ++i) {
+ src[i] = fastrand() & 0xff;
+ }
+
+ int dst_stride = dst_width;
+ int dst_plane_size = dst_stride * dst_height;
+ align_buffer_page_end_16(dst_c, dst_plane_size);
+ align_buffer_page_end_16(dst_opt, dst_plane_size);
+ memset(dst_c, 2, dst_plane_size);
+ memset(dst_opt, 3, dst_plane_size);
+
+ MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization.
+ RotatePlane_16(src, src_stride, dst_c, dst_stride, src_width, src_height,
+ mode);
+
+ MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization.
+ for (int i = 0; i < benchmark_iterations; ++i) {
+ RotatePlane_16(src, src_stride, dst_opt, dst_stride, src_width, src_height,
+ mode);
+ }
+
+ // Rotation should be exact.
+ for (int i = 0; i < dst_plane_size; ++i) {
+ EXPECT_EQ(dst_c[i], dst_opt[i]);
+ }
+
+ free_aligned_buffer_page_end_16(dst_c);
+ free_aligned_buffer_page_end_16(dst_opt);
+ free_aligned_buffer_page_end_16(src);
+}
+
+TEST_F(LibYUVRotateTest, RotatePlane0_16_Opt) {
+ TestRotatePlane_16(benchmark_width_, benchmark_height_, benchmark_width_,
+ benchmark_height_, kRotate0, benchmark_iterations_,
+ disable_cpu_flags_, benchmark_cpu_info_);
+}
+
+TEST_F(LibYUVRotateTest, RotatePlane90_16_Opt) {
+ TestRotatePlane_16(benchmark_width_, benchmark_height_, benchmark_height_,
+ benchmark_width_, kRotate90, benchmark_iterations_,
+ disable_cpu_flags_, benchmark_cpu_info_);
+}
+
+TEST_F(LibYUVRotateTest, RotatePlane180_16_Opt) {
+ TestRotatePlane_16(benchmark_width_, benchmark_height_, benchmark_width_,
+ benchmark_height_, kRotate180, benchmark_iterations_,
+ disable_cpu_flags_, benchmark_cpu_info_);
+}
+
+TEST_F(LibYUVRotateTest, RotatePlane270_16_Opt) {
+ TestRotatePlane_16(benchmark_width_, benchmark_height_, benchmark_height_,
+ benchmark_width_, kRotate270, benchmark_iterations_,
+ disable_cpu_flags_, benchmark_cpu_info_);
+}
+
+TEST_F(LibYUVRotateTest, RotatePlane0_16_Odd) {
+ TestRotatePlane_16(benchmark_width_ + 1, benchmark_height_ + 1,
+ benchmark_width_ + 1, benchmark_height_ + 1, kRotate0,
+ benchmark_iterations_, disable_cpu_flags_,
+ benchmark_cpu_info_);
+}
+
+TEST_F(LibYUVRotateTest, RotatePlane90_16_Odd) {
+ TestRotatePlane_16(benchmark_width_ + 1, benchmark_height_ + 1,
+ benchmark_height_ + 1, benchmark_width_ + 1, kRotate90,
+ benchmark_iterations_, disable_cpu_flags_,
+ benchmark_cpu_info_);
+}
+
+TEST_F(LibYUVRotateTest, RotatePlane180_16_Odd) {
+ TestRotatePlane_16(benchmark_width_ + 1, benchmark_height_ + 1,
+ benchmark_width_ + 1, benchmark_height_ + 1, kRotate180,
+ benchmark_iterations_, disable_cpu_flags_,
+ benchmark_cpu_info_);
+}
+
+TEST_F(LibYUVRotateTest, RotatePlane270_16_Odd) {
+ TestRotatePlane_16(benchmark_width_ + 1, benchmark_height_ + 1,
+ benchmark_height_ + 1, benchmark_width_ + 1, kRotate270,
+ benchmark_iterations_, disable_cpu_flags_,
+ benchmark_cpu_info_);
+}
+
} // namespace libyuv
diff --git a/files/unit_test/rotate_test.cc b/unit_test/rotate_test.cc
index d3887414..abc08efa 100644
--- a/files/unit_test/rotate_test.cc
+++ b/unit_test/rotate_test.cc
@@ -14,6 +14,10 @@
#include "libyuv/cpu_id.h"
#include "libyuv/rotate.h"
+#ifdef ENABLE_ROW_TESTS
+#include "libyuv/rotate_row.h"
+#endif
+
namespace libyuv {
#define SUBSAMPLE(v, a) ((((v) + (a)-1)) / (a))
@@ -596,4 +600,363 @@ TESTAPLANARTOP(Android420, NV21, 2, 1, 0, 2, 2, I420, 2, 2)
#undef TESTAPLANARTOP
#undef TESTAPLANARTOPI
+static void I010TestRotate(int src_width,
+ int src_height,
+ int dst_width,
+ int dst_height,
+ libyuv::RotationMode mode,
+ int benchmark_iterations,
+ int disable_cpu_flags,
+ int benchmark_cpu_info) {
+ if (src_width < 1) {
+ src_width = 1;
+ }
+ if (src_height == 0) {
+ src_height = 1;
+ }
+ if (dst_width < 1) {
+ dst_width = 1;
+ }
+ if (dst_height < 1) {
+ dst_height = 1;
+ }
+ int src_i010_y_size = src_width * Abs(src_height);
+ int src_i010_uv_size = ((src_width + 1) / 2) * ((Abs(src_height) + 1) / 2);
+ int src_i010_size = src_i010_y_size + src_i010_uv_size * 2;
+ align_buffer_page_end_16(src_i010, src_i010_size);
+ for (int i = 0; i < src_i010_size; ++i) {
+ src_i010[i] = fastrand() & 0x3ff;
+ }
+
+ int dst_i010_y_size = dst_width * dst_height;
+ int dst_i010_uv_size = ((dst_width + 1) / 2) * ((dst_height + 1) / 2);
+ int dst_i010_size = dst_i010_y_size + dst_i010_uv_size * 2;
+ align_buffer_page_end_16(dst_i010_c, dst_i010_size);
+ align_buffer_page_end_16(dst_i010_opt, dst_i010_size);
+ memset(dst_i010_c, 2, dst_i010_size * 2);
+ memset(dst_i010_opt, 3, dst_i010_size * 2);
+
+ MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization.
+ I010Rotate(src_i010, src_width, src_i010 + src_i010_y_size,
+ (src_width + 1) / 2, src_i010 + src_i010_y_size + src_i010_uv_size,
+ (src_width + 1) / 2, dst_i010_c, dst_width,
+ dst_i010_c + dst_i010_y_size, (dst_width + 1) / 2,
+ dst_i010_c + dst_i010_y_size + dst_i010_uv_size,
+ (dst_width + 1) / 2, src_width, src_height, mode);
+
+ MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization.
+ for (int i = 0; i < benchmark_iterations; ++i) {
+ I010Rotate(
+ src_i010, src_width, src_i010 + src_i010_y_size, (src_width + 1) / 2,
+ src_i010 + src_i010_y_size + src_i010_uv_size, (src_width + 1) / 2,
+ dst_i010_opt, dst_width, dst_i010_opt + dst_i010_y_size,
+ (dst_width + 1) / 2, dst_i010_opt + dst_i010_y_size + dst_i010_uv_size,
+ (dst_width + 1) / 2, src_width, src_height, mode);
+ }
+
+ // Rotation should be exact.
+ for (int i = 0; i < dst_i010_size; ++i) {
+ EXPECT_EQ(dst_i010_c[i], dst_i010_opt[i]);
+ }
+
+ free_aligned_buffer_page_end_16(dst_i010_c);
+ free_aligned_buffer_page_end_16(dst_i010_opt);
+ free_aligned_buffer_page_end_16(src_i010);
+}
+
+TEST_F(LibYUVRotateTest, I010Rotate0_Opt) {
+ I010TestRotate(benchmark_width_, benchmark_height_, benchmark_width_,
+ benchmark_height_, kRotate0, benchmark_iterations_,
+ disable_cpu_flags_, benchmark_cpu_info_);
+}
+
+TEST_F(LibYUVRotateTest, I010Rotate90_Opt) {
+ I010TestRotate(benchmark_width_, benchmark_height_, benchmark_height_,
+ benchmark_width_, kRotate90, benchmark_iterations_,
+ disable_cpu_flags_, benchmark_cpu_info_);
+}
+
+TEST_F(LibYUVRotateTest, I010Rotate180_Opt) {
+ I010TestRotate(benchmark_width_, benchmark_height_, benchmark_width_,
+ benchmark_height_, kRotate180, benchmark_iterations_,
+ disable_cpu_flags_, benchmark_cpu_info_);
+}
+
+TEST_F(LibYUVRotateTest, I010Rotate270_Opt) {
+ I010TestRotate(benchmark_width_, benchmark_height_, benchmark_height_,
+ benchmark_width_, kRotate270, benchmark_iterations_,
+ disable_cpu_flags_, benchmark_cpu_info_);
+}
+
+static void I210TestRotate(int src_width,
+ int src_height,
+ int dst_width,
+ int dst_height,
+ libyuv::RotationMode mode,
+ int benchmark_iterations,
+ int disable_cpu_flags,
+ int benchmark_cpu_info) {
+ if (src_width < 1) {
+ src_width = 1;
+ }
+ if (src_height == 0) {
+ src_height = 1;
+ }
+ if (dst_width < 1) {
+ dst_width = 1;
+ }
+ if (dst_height < 1) {
+ dst_height = 1;
+ }
+ int src_i210_y_size = src_width * Abs(src_height);
+ int src_i210_uv_size = ((src_width + 1) / 2) * Abs(src_height);
+ int src_i210_size = src_i210_y_size + src_i210_uv_size * 2;
+ align_buffer_page_end_16(src_i210, src_i210_size);
+ for (int i = 0; i < src_i210_size; ++i) {
+ src_i210[i] = fastrand() & 0x3ff;
+ }
+
+ int dst_i210_y_size = dst_width * dst_height;
+ int dst_i210_uv_size = ((dst_width + 1) / 2) * dst_height;
+ int dst_i210_size = dst_i210_y_size + dst_i210_uv_size * 2;
+ align_buffer_page_end_16(dst_i210_c, dst_i210_size);
+ align_buffer_page_end_16(dst_i210_opt, dst_i210_size);
+ memset(dst_i210_c, 2, dst_i210_size * 2);
+ memset(dst_i210_opt, 3, dst_i210_size * 2);
+
+ MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization.
+ I210Rotate(src_i210, src_width, src_i210 + src_i210_y_size,
+ (src_width + 1) / 2, src_i210 + src_i210_y_size + src_i210_uv_size,
+ (src_width + 1) / 2, dst_i210_c, dst_width,
+ dst_i210_c + dst_i210_y_size, (dst_width + 1) / 2,
+ dst_i210_c + dst_i210_y_size + dst_i210_uv_size,
+ (dst_width + 1) / 2, src_width, src_height, mode);
+
+ MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization.
+ for (int i = 0; i < benchmark_iterations; ++i) {
+ I210Rotate(
+ src_i210, src_width, src_i210 + src_i210_y_size, (src_width + 1) / 2,
+ src_i210 + src_i210_y_size + src_i210_uv_size, (src_width + 1) / 2,
+ dst_i210_opt, dst_width, dst_i210_opt + dst_i210_y_size,
+ (dst_width + 1) / 2, dst_i210_opt + dst_i210_y_size + dst_i210_uv_size,
+ (dst_width + 1) / 2, src_width, src_height, mode);
+ }
+
+ // Rotation should be exact.
+ for (int i = 0; i < dst_i210_size; ++i) {
+ EXPECT_EQ(dst_i210_c[i], dst_i210_opt[i]);
+ }
+
+ free_aligned_buffer_page_end_16(dst_i210_c);
+ free_aligned_buffer_page_end_16(dst_i210_opt);
+ free_aligned_buffer_page_end_16(src_i210);
+}
+
+TEST_F(LibYUVRotateTest, I210Rotate0_Opt) {
+ I210TestRotate(benchmark_width_, benchmark_height_, benchmark_width_,
+ benchmark_height_, kRotate0, benchmark_iterations_,
+ disable_cpu_flags_, benchmark_cpu_info_);
+}
+
+TEST_F(LibYUVRotateTest, I210Rotate90_Opt) {
+ I210TestRotate(benchmark_width_, benchmark_height_, benchmark_height_,
+ benchmark_width_, kRotate90, benchmark_iterations_,
+ disable_cpu_flags_, benchmark_cpu_info_);
+}
+
+TEST_F(LibYUVRotateTest, I210Rotate180_Opt) {
+ I210TestRotate(benchmark_width_, benchmark_height_, benchmark_width_,
+ benchmark_height_, kRotate180, benchmark_iterations_,
+ disable_cpu_flags_, benchmark_cpu_info_);
+}
+
+TEST_F(LibYUVRotateTest, I210Rotate270_Opt) {
+ I210TestRotate(benchmark_width_, benchmark_height_, benchmark_height_,
+ benchmark_width_, kRotate270, benchmark_iterations_,
+ disable_cpu_flags_, benchmark_cpu_info_);
+}
+
+static void I410TestRotate(int src_width,
+ int src_height,
+ int dst_width,
+ int dst_height,
+ libyuv::RotationMode mode,
+ int benchmark_iterations,
+ int disable_cpu_flags,
+ int benchmark_cpu_info) {
+ if (src_width < 1) {
+ src_width = 1;
+ }
+ if (src_height == 0) {
+ src_height = 1;
+ }
+ if (dst_width < 1) {
+ dst_width = 1;
+ }
+ if (dst_height < 1) {
+ dst_height = 1;
+ }
+ int src_i410_y_size = src_width * Abs(src_height);
+ int src_i410_uv_size = src_width * Abs(src_height);
+ int src_i410_size = src_i410_y_size + src_i410_uv_size * 2;
+ align_buffer_page_end_16(src_i410, src_i410_size);
+ for (int i = 0; i < src_i410_size; ++i) {
+ src_i410[i] = fastrand() & 0x3ff;
+ }
+
+ int dst_i410_y_size = dst_width * dst_height;
+ int dst_i410_uv_size = dst_width * dst_height;
+ int dst_i410_size = dst_i410_y_size + dst_i410_uv_size * 2;
+ align_buffer_page_end_16(dst_i410_c, dst_i410_size);
+ align_buffer_page_end_16(dst_i410_opt, dst_i410_size);
+ memset(dst_i410_c, 2, dst_i410_size * 2);
+ memset(dst_i410_opt, 3, dst_i410_size * 2);
+
+ MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization.
+ I410Rotate(src_i410, src_width, src_i410 + src_i410_y_size, src_width,
+ src_i410 + src_i410_y_size + src_i410_uv_size, src_width,
+ dst_i410_c, dst_width, dst_i410_c + dst_i410_y_size, dst_width,
+ dst_i410_c + dst_i410_y_size + dst_i410_uv_size, dst_width,
+ src_width, src_height, mode);
+
+ MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization.
+ for (int i = 0; i < benchmark_iterations; ++i) {
+ I410Rotate(src_i410, src_width, src_i410 + src_i410_y_size, src_width,
+ src_i410 + src_i410_y_size + src_i410_uv_size, src_width,
+ dst_i410_opt, dst_width, dst_i410_opt + dst_i410_y_size,
+ dst_width, dst_i410_opt + dst_i410_y_size + dst_i410_uv_size,
+ dst_width, src_width, src_height, mode);
+ }
+
+ // Rotation should be exact.
+ for (int i = 0; i < dst_i410_size; ++i) {
+ EXPECT_EQ(dst_i410_c[i], dst_i410_opt[i]);
+ }
+
+ free_aligned_buffer_page_end_16(dst_i410_c);
+ free_aligned_buffer_page_end_16(dst_i410_opt);
+ free_aligned_buffer_page_end_16(src_i410);
+}
+
+TEST_F(LibYUVRotateTest, I410Rotate0_Opt) {
+ I410TestRotate(benchmark_width_, benchmark_height_, benchmark_width_,
+ benchmark_height_, kRotate0, benchmark_iterations_,
+ disable_cpu_flags_, benchmark_cpu_info_);
+}
+
+TEST_F(LibYUVRotateTest, I410Rotate90_Opt) {
+ I410TestRotate(benchmark_width_, benchmark_height_, benchmark_height_,
+ benchmark_width_, kRotate90, benchmark_iterations_,
+ disable_cpu_flags_, benchmark_cpu_info_);
+}
+
+TEST_F(LibYUVRotateTest, I410Rotate180_Opt) {
+ I410TestRotate(benchmark_width_, benchmark_height_, benchmark_width_,
+ benchmark_height_, kRotate180, benchmark_iterations_,
+ disable_cpu_flags_, benchmark_cpu_info_);
+}
+
+TEST_F(LibYUVRotateTest, I410Rotate270_Opt) {
+ I410TestRotate(benchmark_width_, benchmark_height_, benchmark_height_,
+ benchmark_width_, kRotate270, benchmark_iterations_,
+ disable_cpu_flags_, benchmark_cpu_info_);
+}
+
+#if defined(ENABLE_ROW_TESTS)
+
+TEST_F(LibYUVRotateTest, Transpose4x4_Test) {
+ // dst width and height
+ const int width = 4;
+ const int height = 4;
+ int src_pixels[4][4];
+ int dst_pixels_c[4][4];
+ int dst_pixels_opt[4][4];
+
+ for (int i = 0; i < 4; ++i) {
+ for (int j = 0; j < 4; ++j) {
+ src_pixels[i][j] = i * 10 + j;
+ }
+ }
+ memset(dst_pixels_c, 1, width * height * 4);
+ memset(dst_pixels_opt, 2, width * height * 4);
+
+ Transpose4x4_32_C((const uint8_t*)src_pixels, height * 4,
+ (uint8_t*)dst_pixels_c, width * 4, width);
+
+ const int benchmark_iterations =
+ (benchmark_iterations_ * benchmark_width_ * benchmark_height_ + 15) /
+ (4 * 4);
+ for (int i = 0; i < benchmark_iterations; ++i) {
+#if defined(HAS_TRANSPOSE4X4_32_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ Transpose4x4_32_NEON((const uint8_t*)src_pixels, height * 4,
+ (uint8_t*)dst_pixels_opt, width * 4, width);
+ } else
+#elif defined(HAS_TRANSPOSE4X4_32_SSE2)
+ if (TestCpuFlag(kCpuHasSSE2)) {
+ Transpose4x4_32_SSE2((const uint8_t*)src_pixels, height * 4,
+ (uint8_t*)dst_pixels_opt, width * 4, width);
+ } else
+#endif
+ {
+ Transpose4x4_32_C((const uint8_t*)src_pixels, height * 4,
+ (uint8_t*)dst_pixels_opt, width * 4, width);
+ }
+ }
+
+ for (int i = 0; i < 4; ++i) {
+ for (int j = 0; j < 4; ++j) {
+ EXPECT_EQ(dst_pixels_c[i][j], src_pixels[j][i]);
+ EXPECT_EQ(dst_pixels_c[i][j], dst_pixels_opt[i][j]);
+ }
+ }
+}
+
+TEST_F(LibYUVRotateTest, Transpose4x4_Opt) {
+ // dst width and height
+ const int width = ((benchmark_width_ * benchmark_height_ + 3) / 4 + 3) & ~3;
+ const int height = 4;
+ align_buffer_page_end(src_pixels, height * width * 4);
+ align_buffer_page_end(dst_pixels_c, width * height * 4);
+ align_buffer_page_end(dst_pixels_opt, width * height * 4);
+
+ MemRandomize(src_pixels, height * width * 4);
+ memset(dst_pixels_c, 1, width * height * 4);
+ memset(dst_pixels_opt, 2, width * height * 4);
+
+ Transpose4x4_32_C((const uint8_t*)src_pixels, height * 4,
+ (uint8_t*)dst_pixels_c, width * 4, width);
+
+ for (int i = 0; i < benchmark_iterations_; ++i) {
+#if defined(HAS_TRANSPOSE4X4_32_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ Transpose4x4_32_NEON((const uint8_t*)src_pixels, height * 4,
+ (uint8_t*)dst_pixels_opt, width * 4, width);
+ } else
+#elif defined(HAS_TRANSPOSE4X4_32_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ Transpose4x4_32_AVX2((const uint8_t*)src_pixels, height * 4,
+ (uint8_t*)dst_pixels_opt, width * 4, width);
+ } else if (TestCpuFlag(kCpuHasSSE2)) {
+ Transpose4x4_32_SSE2((const uint8_t*)src_pixels, height * 4,
+ (uint8_t*)dst_pixels_opt, width * 4, width);
+ } else
+#endif
+ {
+ Transpose4x4_32_C((const uint8_t*)src_pixels, height * 4,
+ (uint8_t*)dst_pixels_opt, width * 4, width);
+ }
+ }
+
+ for (int i = 0; i < width * height; ++i) {
+ EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
+ }
+
+ free_aligned_buffer_page_end(src_pixels);
+ free_aligned_buffer_page_end(dst_pixels_c);
+ free_aligned_buffer_page_end(dst_pixels_opt);
+}
+
+#endif // ENABLE_ROW_TESTS
+
} // namespace libyuv
diff --git a/files/unit_test/scale_argb_test.cc b/unit_test/scale_argb_test.cc
index f54a68f1..f54a68f1 100644
--- a/files/unit_test/scale_argb_test.cc
+++ b/unit_test/scale_argb_test.cc
diff --git a/unit_test/scale_plane_test.cc b/unit_test/scale_plane_test.cc
new file mode 100644
index 00000000..9ce47a02
--- /dev/null
+++ b/unit_test/scale_plane_test.cc
@@ -0,0 +1,470 @@
+/*
+ * Copyright 2023 The LibYuv Project Authors. All rights reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <stdlib.h>
+#include <time.h>
+
+#include "../unit_test/unit_test.h"
+#include "libyuv/cpu_id.h"
+#include "libyuv/scale.h"
+
+#ifdef ENABLE_ROW_TESTS
+#include "libyuv/scale_row.h" // For ScaleRowDown2Box_Odd_C
+#endif
+
+#define STRINGIZE(line) #line
+#define FILELINESTR(file, line) file ":" STRINGIZE(line)
+
+#if defined(__riscv) && !defined(__clang__)
+#define DISABLE_SLOW_TESTS
+#undef ENABLE_FULL_TESTS
+#undef ENABLE_ROW_TESTS
+#define LEAN_TESTS
+#endif
+
+#if !defined(DISABLE_SLOW_TESTS) || defined(__x86_64__) || defined(__i386__)
+// SLOW TESTS are those that are unoptimized C code.
+// FULL TESTS are optimized but test many variations of the same code.
+#define ENABLE_FULL_TESTS
+#endif
+
+namespace libyuv {
+
+#ifdef ENABLE_ROW_TESTS
+#ifdef HAS_SCALEROWDOWN2_SSSE3
+TEST_F(LibYUVScaleTest, TestScaleRowDown2Box_Odd_SSSE3) {
+ SIMD_ALIGNED(uint8_t orig_pixels[128 * 2]);
+ SIMD_ALIGNED(uint8_t dst_pixels_opt[64]);
+ SIMD_ALIGNED(uint8_t dst_pixels_c[64]);
+ memset(orig_pixels, 0, sizeof(orig_pixels));
+ memset(dst_pixels_opt, 0, sizeof(dst_pixels_opt));
+ memset(dst_pixels_c, 0, sizeof(dst_pixels_c));
+
+ int has_ssse3 = TestCpuFlag(kCpuHasSSSE3);
+ if (!has_ssse3) {
+ printf("Warning SSSE3 not detected; Skipping test.\n");
+ } else {
+ // TL.
+ orig_pixels[0] = 255u;
+ orig_pixels[1] = 0u;
+ orig_pixels[128 + 0] = 0u;
+ orig_pixels[128 + 1] = 0u;
+ // TR.
+ orig_pixels[2] = 0u;
+ orig_pixels[3] = 100u;
+ orig_pixels[128 + 2] = 0u;
+ orig_pixels[128 + 3] = 0u;
+ // BL.
+ orig_pixels[4] = 0u;
+ orig_pixels[5] = 0u;
+ orig_pixels[128 + 4] = 50u;
+ orig_pixels[128 + 5] = 0u;
+ // BR.
+ orig_pixels[6] = 0u;
+ orig_pixels[7] = 0u;
+ orig_pixels[128 + 6] = 0u;
+ orig_pixels[128 + 7] = 20u;
+ // Odd.
+ orig_pixels[126] = 4u;
+ orig_pixels[127] = 255u;
+ orig_pixels[128 + 126] = 16u;
+ orig_pixels[128 + 127] = 255u;
+
+ // Test regular half size.
+ ScaleRowDown2Box_C(orig_pixels, 128, dst_pixels_c, 64);
+
+ EXPECT_EQ(64u, dst_pixels_c[0]);
+ EXPECT_EQ(25u, dst_pixels_c[1]);
+ EXPECT_EQ(13u, dst_pixels_c[2]);
+ EXPECT_EQ(5u, dst_pixels_c[3]);
+ EXPECT_EQ(0u, dst_pixels_c[4]);
+ EXPECT_EQ(133u, dst_pixels_c[63]);
+
+ // Test Odd width version - Last pixel is just 1 horizontal pixel.
+ ScaleRowDown2Box_Odd_C(orig_pixels, 128, dst_pixels_c, 64);
+
+ EXPECT_EQ(64u, dst_pixels_c[0]);
+ EXPECT_EQ(25u, dst_pixels_c[1]);
+ EXPECT_EQ(13u, dst_pixels_c[2]);
+ EXPECT_EQ(5u, dst_pixels_c[3]);
+ EXPECT_EQ(0u, dst_pixels_c[4]);
+ EXPECT_EQ(10u, dst_pixels_c[63]);
+
+ // Test one pixel less, should skip the last pixel.
+ memset(dst_pixels_c, 0, sizeof(dst_pixels_c));
+ ScaleRowDown2Box_Odd_C(orig_pixels, 128, dst_pixels_c, 63);
+
+ EXPECT_EQ(64u, dst_pixels_c[0]);
+ EXPECT_EQ(25u, dst_pixels_c[1]);
+ EXPECT_EQ(13u, dst_pixels_c[2]);
+ EXPECT_EQ(5u, dst_pixels_c[3]);
+ EXPECT_EQ(0u, dst_pixels_c[4]);
+ EXPECT_EQ(0u, dst_pixels_c[63]);
+
+ // Test regular half size SSSE3.
+ ScaleRowDown2Box_SSSE3(orig_pixels, 128, dst_pixels_opt, 64);
+
+ EXPECT_EQ(64u, dst_pixels_opt[0]);
+ EXPECT_EQ(25u, dst_pixels_opt[1]);
+ EXPECT_EQ(13u, dst_pixels_opt[2]);
+ EXPECT_EQ(5u, dst_pixels_opt[3]);
+ EXPECT_EQ(0u, dst_pixels_opt[4]);
+ EXPECT_EQ(133u, dst_pixels_opt[63]);
+
+ // Compare C and SSSE3 match.
+ ScaleRowDown2Box_Odd_C(orig_pixels, 128, dst_pixels_c, 64);
+ ScaleRowDown2Box_Odd_SSSE3(orig_pixels, 128, dst_pixels_opt, 64);
+ for (int i = 0; i < 64; ++i) {
+ EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
+ }
+ }
+}
+#endif // HAS_SCALEROWDOWN2_SSSE3
+
+extern "C" void ScaleRowDown2Box_16_NEON(const uint16_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint16_t* dst,
+ int dst_width);
+
+TEST_F(LibYUVScaleTest, TestScaleRowDown2Box_16) {
+ SIMD_ALIGNED(uint16_t orig_pixels[2560 * 2]);
+ SIMD_ALIGNED(uint16_t dst_pixels_c[1280]);
+ SIMD_ALIGNED(uint16_t dst_pixels_opt[1280]);
+
+ memset(orig_pixels, 0, sizeof(orig_pixels));
+ memset(dst_pixels_c, 1, sizeof(dst_pixels_c));
+ memset(dst_pixels_opt, 2, sizeof(dst_pixels_opt));
+
+ for (int i = 0; i < 2560 * 2; ++i) {
+ orig_pixels[i] = i;
+ }
+ ScaleRowDown2Box_16_C(&orig_pixels[0], 2560, &dst_pixels_c[0], 1280);
+ for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
+#if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
+ int has_neon = TestCpuFlag(kCpuHasNEON);
+ if (has_neon) {
+ ScaleRowDown2Box_16_NEON(&orig_pixels[0], 2560, &dst_pixels_opt[0], 1280);
+ } else {
+ ScaleRowDown2Box_16_C(&orig_pixels[0], 2560, &dst_pixels_opt[0], 1280);
+ }
+#else
+ ScaleRowDown2Box_16_C(&orig_pixels[0], 2560, &dst_pixels_opt[0], 1280);
+#endif
+ }
+
+ for (int i = 0; i < 1280; ++i) {
+ EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
+ }
+
+ EXPECT_EQ(dst_pixels_c[0], (0 + 1 + 2560 + 2561 + 2) / 4);
+ EXPECT_EQ(dst_pixels_c[1279], 3839);
+}
+#endif // ENABLE_ROW_TESTS
+
+// Test scaling plane with 8 bit C vs 12 bit C and return maximum pixel
+// difference.
+// 0 = exact.
+static int TestPlaneFilter_16(int src_width,
+ int src_height,
+ int dst_width,
+ int dst_height,
+ FilterMode f,
+ int benchmark_iterations,
+ int disable_cpu_flags,
+ int benchmark_cpu_info) {
+ if (!SizeValid(src_width, src_height, dst_width, dst_height)) {
+ return 0;
+ }
+
+ int i;
+ int64_t src_y_plane_size = (Abs(src_width)) * (Abs(src_height));
+ int src_stride_y = Abs(src_width);
+ int dst_y_plane_size = dst_width * dst_height;
+ int dst_stride_y = dst_width;
+
+ align_buffer_page_end(src_y, src_y_plane_size);
+ align_buffer_page_end(src_y_16, src_y_plane_size * 2);
+ align_buffer_page_end(dst_y_8, dst_y_plane_size);
+ align_buffer_page_end(dst_y_16, dst_y_plane_size * 2);
+ uint16_t* p_src_y_16 = reinterpret_cast<uint16_t*>(src_y_16);
+ uint16_t* p_dst_y_16 = reinterpret_cast<uint16_t*>(dst_y_16);
+
+ MemRandomize(src_y, src_y_plane_size);
+ memset(dst_y_8, 0, dst_y_plane_size);
+ memset(dst_y_16, 1, dst_y_plane_size * 2);
+
+ for (i = 0; i < src_y_plane_size; ++i) {
+ p_src_y_16[i] = src_y[i] & 255;
+ }
+
+ MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization.
+ ScalePlane(src_y, src_stride_y, src_width, src_height, dst_y_8, dst_stride_y,
+ dst_width, dst_height, f);
+ MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization.
+
+ for (i = 0; i < benchmark_iterations; ++i) {
+ ScalePlane_16(p_src_y_16, src_stride_y, src_width, src_height, p_dst_y_16,
+ dst_stride_y, dst_width, dst_height, f);
+ }
+
+ // Expect an exact match.
+ int max_diff = 0;
+ for (i = 0; i < dst_y_plane_size; ++i) {
+ int abs_diff = Abs(dst_y_8[i] - p_dst_y_16[i]);
+ if (abs_diff > max_diff) {
+ max_diff = abs_diff;
+ }
+ }
+
+ free_aligned_buffer_page_end(dst_y_8);
+ free_aligned_buffer_page_end(dst_y_16);
+ free_aligned_buffer_page_end(src_y);
+ free_aligned_buffer_page_end(src_y_16);
+
+ return max_diff;
+}
+
+// The following adjustments in dimensions ensure the scale factor will be
+// exactly achieved.
+// 2 is chroma subsample.
+#define DX(x, nom, denom) static_cast<int>(((Abs(x) / nom + 1) / 2) * nom * 2)
+#define SX(x, nom, denom) static_cast<int>(((x / nom + 1) / 2) * denom * 2)
+
+#define TEST_FACTOR1(name, filter, nom, denom, max_diff) \
+ TEST_F(LibYUVScaleTest, DISABLED_##ScalePlaneDownBy##name##_##filter##_16) { \
+ int diff = TestPlaneFilter_16( \
+ SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \
+ DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \
+ kFilter##filter, benchmark_iterations_, disable_cpu_flags_, \
+ benchmark_cpu_info_); \
+ EXPECT_LE(diff, max_diff); \
+ }
+
+// Test a scale factor with all 4 filters. Expect unfiltered to be exact, but
+// filtering is different fixed point implementations for SSSE3, Neon and C.
+#define TEST_FACTOR(name, nom, denom, boxdiff) \
+ TEST_FACTOR1(name, None, nom, denom, 0) \
+ TEST_FACTOR1(name, Linear, nom, denom, boxdiff) \
+ TEST_FACTOR1(name, Bilinear, nom, denom, boxdiff) \
+ TEST_FACTOR1(name, Box, nom, denom, boxdiff)
+
+TEST_FACTOR(2, 1, 2, 0)
+TEST_FACTOR(4, 1, 4, 0)
+// TEST_FACTOR(8, 1, 8, 0) Disable for benchmark performance. Takes 90 seconds.
+TEST_FACTOR(3by4, 3, 4, 1)
+TEST_FACTOR(3by8, 3, 8, 1)
+TEST_FACTOR(3, 1, 3, 0)
+#undef TEST_FACTOR1
+#undef TEST_FACTOR
+#undef SX
+#undef DX
+
+TEST_F(LibYUVScaleTest, PlaneTest3x) {
+ const int kSrcStride = 480;
+ const int kDstStride = 160;
+ const int kSize = kSrcStride * 3;
+ align_buffer_page_end(orig_pixels, kSize);
+ for (int i = 0; i < 480 * 3; ++i) {
+ orig_pixels[i] = i;
+ }
+ align_buffer_page_end(dest_pixels, kDstStride);
+
+ int iterations160 = (benchmark_width_ * benchmark_height_ + (160 - 1)) / 160 *
+ benchmark_iterations_;
+ for (int i = 0; i < iterations160; ++i) {
+ ScalePlane(orig_pixels, kSrcStride, 480, 3, dest_pixels, kDstStride, 160, 1,
+ kFilterBilinear);
+ }
+
+ EXPECT_EQ(225, dest_pixels[0]);
+
+ ScalePlane(orig_pixels, kSrcStride, 480, 3, dest_pixels, kDstStride, 160, 1,
+ kFilterNone);
+
+ EXPECT_EQ(225, dest_pixels[0]);
+
+ free_aligned_buffer_page_end(dest_pixels);
+ free_aligned_buffer_page_end(orig_pixels);
+}
+
+TEST_F(LibYUVScaleTest, PlaneTest4x) {
+ const int kSrcStride = 640;
+ const int kDstStride = 160;
+ const int kSize = kSrcStride * 4;
+ align_buffer_page_end(orig_pixels, kSize);
+ for (int i = 0; i < 640 * 4; ++i) {
+ orig_pixels[i] = i;
+ }
+ align_buffer_page_end(dest_pixels, kDstStride);
+
+ int iterations160 = (benchmark_width_ * benchmark_height_ + (160 - 1)) / 160 *
+ benchmark_iterations_;
+ for (int i = 0; i < iterations160; ++i) {
+ ScalePlane(orig_pixels, kSrcStride, 640, 4, dest_pixels, kDstStride, 160, 1,
+ kFilterBilinear);
+ }
+
+ EXPECT_EQ(66, dest_pixels[0]);
+
+ ScalePlane(orig_pixels, kSrcStride, 640, 4, dest_pixels, kDstStride, 160, 1,
+ kFilterNone);
+
+ EXPECT_EQ(2, dest_pixels[0]); // expect the 3rd pixel of the 3rd row
+
+ free_aligned_buffer_page_end(dest_pixels);
+ free_aligned_buffer_page_end(orig_pixels);
+}
+
+// Intent is to test 200x50 to 50x200 but width and height can be parameters.
+TEST_F(LibYUVScaleTest, PlaneTestRotate_None) {
+ const int kSize = benchmark_width_ * benchmark_height_;
+ align_buffer_page_end(orig_pixels, kSize);
+ for (int i = 0; i < kSize; ++i) {
+ orig_pixels[i] = i;
+ }
+ align_buffer_page_end(dest_opt_pixels, kSize);
+ align_buffer_page_end(dest_c_pixels, kSize);
+
+ MaskCpuFlags(disable_cpu_flags_); // Disable all CPU optimization.
+ ScalePlane(orig_pixels, benchmark_width_, benchmark_width_, benchmark_height_,
+ dest_c_pixels, benchmark_height_, benchmark_height_,
+ benchmark_width_, kFilterNone);
+ MaskCpuFlags(benchmark_cpu_info_); // Enable all CPU optimization.
+
+ for (int i = 0; i < benchmark_iterations_; ++i) {
+ ScalePlane(orig_pixels, benchmark_width_, benchmark_width_,
+ benchmark_height_, dest_opt_pixels, benchmark_height_,
+ benchmark_height_, benchmark_width_, kFilterNone);
+ }
+
+ for (int i = 0; i < kSize; ++i) {
+ EXPECT_EQ(dest_c_pixels[i], dest_opt_pixels[i]);
+ }
+
+ free_aligned_buffer_page_end(dest_c_pixels);
+ free_aligned_buffer_page_end(dest_opt_pixels);
+ free_aligned_buffer_page_end(orig_pixels);
+}
+
+TEST_F(LibYUVScaleTest, PlaneTestRotate_Bilinear) {
+ const int kSize = benchmark_width_ * benchmark_height_;
+ align_buffer_page_end(orig_pixels, kSize);
+ for (int i = 0; i < kSize; ++i) {
+ orig_pixels[i] = i;
+ }
+ align_buffer_page_end(dest_opt_pixels, kSize);
+ align_buffer_page_end(dest_c_pixels, kSize);
+
+ MaskCpuFlags(disable_cpu_flags_); // Disable all CPU optimization.
+ ScalePlane(orig_pixels, benchmark_width_, benchmark_width_, benchmark_height_,
+ dest_c_pixels, benchmark_height_, benchmark_height_,
+ benchmark_width_, kFilterBilinear);
+ MaskCpuFlags(benchmark_cpu_info_); // Enable all CPU optimization.
+
+ for (int i = 0; i < benchmark_iterations_; ++i) {
+ ScalePlane(orig_pixels, benchmark_width_, benchmark_width_,
+ benchmark_height_, dest_opt_pixels, benchmark_height_,
+ benchmark_height_, benchmark_width_, kFilterBilinear);
+ }
+
+ for (int i = 0; i < kSize; ++i) {
+ EXPECT_EQ(dest_c_pixels[i], dest_opt_pixels[i]);
+ }
+
+ free_aligned_buffer_page_end(dest_c_pixels);
+ free_aligned_buffer_page_end(dest_opt_pixels);
+ free_aligned_buffer_page_end(orig_pixels);
+}
+
+// Intent is to test 200x50 to 50x200 but width and height can be parameters.
+TEST_F(LibYUVScaleTest, PlaneTestRotate_Box) {
+ const int kSize = benchmark_width_ * benchmark_height_;
+ align_buffer_page_end(orig_pixels, kSize);
+ for (int i = 0; i < kSize; ++i) {
+ orig_pixels[i] = i;
+ }
+ align_buffer_page_end(dest_opt_pixels, kSize);
+ align_buffer_page_end(dest_c_pixels, kSize);
+
+ MaskCpuFlags(disable_cpu_flags_); // Disable all CPU optimization.
+ ScalePlane(orig_pixels, benchmark_width_, benchmark_width_, benchmark_height_,
+ dest_c_pixels, benchmark_height_, benchmark_height_,
+ benchmark_width_, kFilterBox);
+ MaskCpuFlags(benchmark_cpu_info_); // Enable all CPU optimization.
+
+ for (int i = 0; i < benchmark_iterations_; ++i) {
+ ScalePlane(orig_pixels, benchmark_width_, benchmark_width_,
+ benchmark_height_, dest_opt_pixels, benchmark_height_,
+ benchmark_height_, benchmark_width_, kFilterBox);
+ }
+
+ for (int i = 0; i < kSize; ++i) {
+ EXPECT_EQ(dest_c_pixels[i], dest_opt_pixels[i]);
+ }
+
+ free_aligned_buffer_page_end(dest_c_pixels);
+ free_aligned_buffer_page_end(dest_opt_pixels);
+ free_aligned_buffer_page_end(orig_pixels);
+}
+
+TEST_F(LibYUVScaleTest, PlaneTest1_Box) {
+ align_buffer_page_end(orig_pixels, 3);
+ align_buffer_page_end(dst_pixels, 3);
+
+ // Pad the 1x1 byte image with invalid values before and after in case libyuv
+ // reads outside the memory boundaries.
+ orig_pixels[0] = 0;
+ orig_pixels[1] = 1; // scale this pixel
+ orig_pixels[2] = 2;
+ dst_pixels[0] = 3;
+ dst_pixels[1] = 3;
+ dst_pixels[2] = 3;
+
+ libyuv::ScalePlane(orig_pixels + 1, /* src_stride= */ 1, /* src_width= */ 1,
+ /* src_height= */ 1, dst_pixels, /* dst_stride= */ 1,
+ /* dst_width= */ 1, /* dst_height= */ 2,
+ libyuv::kFilterBox);
+
+ EXPECT_EQ(dst_pixels[0], 1);
+ EXPECT_EQ(dst_pixels[1], 1);
+ EXPECT_EQ(dst_pixels[2], 3);
+
+ free_aligned_buffer_page_end(dst_pixels);
+ free_aligned_buffer_page_end(orig_pixels);
+}
+
+TEST_F(LibYUVScaleTest, PlaneTest1_16_Box) {
+ align_buffer_page_end(orig_pixels_alloc, 3 * 2);
+ align_buffer_page_end(dst_pixels_alloc, 3 * 2);
+ uint16_t* orig_pixels = (uint16_t*)orig_pixels_alloc;
+ uint16_t* dst_pixels = (uint16_t*)dst_pixels_alloc;
+
+ // Pad the 1x1 byte image with invalid values before and after in case libyuv
+ // reads outside the memory boundaries.
+ orig_pixels[0] = 0;
+ orig_pixels[1] = 1; // scale this pixel
+ orig_pixels[2] = 2;
+ dst_pixels[0] = 3;
+ dst_pixels[1] = 3;
+ dst_pixels[2] = 3;
+
+ libyuv::ScalePlane_16(
+ orig_pixels + 1, /* src_stride= */ 1, /* src_width= */ 1,
+ /* src_height= */ 1, dst_pixels, /* dst_stride= */ 1,
+ /* dst_width= */ 1, /* dst_height= */ 2, libyuv::kFilterNone);
+
+ EXPECT_EQ(dst_pixels[0], 1);
+ EXPECT_EQ(dst_pixels[1], 1);
+ EXPECT_EQ(dst_pixels[2], 3);
+
+ free_aligned_buffer_page_end(dst_pixels_alloc);
+ free_aligned_buffer_page_end(orig_pixels_alloc);
+}
+} // namespace libyuv
diff --git a/files/unit_test/scale_rgb_test.cc b/unit_test/scale_rgb_test.cc
index 8296abe3..8296abe3 100644
--- a/files/unit_test/scale_rgb_test.cc
+++ b/unit_test/scale_rgb_test.cc
diff --git a/files/unit_test/scale_test.cc b/unit_test/scale_test.cc
index a8c95268..6e3b9271 100644
--- a/files/unit_test/scale_test.cc
+++ b/unit_test/scale_test.cc
@@ -22,6 +22,11 @@
#define STRINGIZE(line) #line
#define FILELINESTR(file, line) file ":" STRINGIZE(line)
+#if defined(__riscv) && !defined(__clang__)
+#define DISABLE_SLOW_TESTS
+#undef ENABLE_FULL_TESTS
+#endif
+
#if !defined(DISABLE_SLOW_TESTS) || defined(__x86_64__) || defined(__i386__)
// SLOW TESTS are those that are unoptimized C code.
// FULL TESTS are optimized but test many variations of the same code.
@@ -1123,479 +1128,6 @@ TEST_SCALESWAPXY1(DISABLED_, Scale, Bilinear, 3)
TEST_SCALESWAPXY1(DISABLED_, Scale, Box, 3)
#endif
#endif
-
#undef TEST_SCALESWAPXY1
-#ifdef ENABLE_ROW_TESTS
-#ifdef HAS_SCALEROWDOWN2_SSSE3
-TEST_F(LibYUVScaleTest, TestScaleRowDown2Box_Odd_SSSE3) {
- SIMD_ALIGNED(uint8_t orig_pixels[128 * 2]);
- SIMD_ALIGNED(uint8_t dst_pixels_opt[64]);
- SIMD_ALIGNED(uint8_t dst_pixels_c[64]);
- memset(orig_pixels, 0, sizeof(orig_pixels));
- memset(dst_pixels_opt, 0, sizeof(dst_pixels_opt));
- memset(dst_pixels_c, 0, sizeof(dst_pixels_c));
-
- int has_ssse3 = TestCpuFlag(kCpuHasSSSE3);
- if (!has_ssse3) {
- printf("Warning SSSE3 not detected; Skipping test.\n");
- } else {
- // TL.
- orig_pixels[0] = 255u;
- orig_pixels[1] = 0u;
- orig_pixels[128 + 0] = 0u;
- orig_pixels[128 + 1] = 0u;
- // TR.
- orig_pixels[2] = 0u;
- orig_pixels[3] = 100u;
- orig_pixels[128 + 2] = 0u;
- orig_pixels[128 + 3] = 0u;
- // BL.
- orig_pixels[4] = 0u;
- orig_pixels[5] = 0u;
- orig_pixels[128 + 4] = 50u;
- orig_pixels[128 + 5] = 0u;
- // BR.
- orig_pixels[6] = 0u;
- orig_pixels[7] = 0u;
- orig_pixels[128 + 6] = 0u;
- orig_pixels[128 + 7] = 20u;
- // Odd.
- orig_pixels[126] = 4u;
- orig_pixels[127] = 255u;
- orig_pixels[128 + 126] = 16u;
- orig_pixels[128 + 127] = 255u;
-
- // Test regular half size.
- ScaleRowDown2Box_C(orig_pixels, 128, dst_pixels_c, 64);
-
- EXPECT_EQ(64u, dst_pixels_c[0]);
- EXPECT_EQ(25u, dst_pixels_c[1]);
- EXPECT_EQ(13u, dst_pixels_c[2]);
- EXPECT_EQ(5u, dst_pixels_c[3]);
- EXPECT_EQ(0u, dst_pixels_c[4]);
- EXPECT_EQ(133u, dst_pixels_c[63]);
-
- // Test Odd width version - Last pixel is just 1 horizontal pixel.
- ScaleRowDown2Box_Odd_C(orig_pixels, 128, dst_pixels_c, 64);
-
- EXPECT_EQ(64u, dst_pixels_c[0]);
- EXPECT_EQ(25u, dst_pixels_c[1]);
- EXPECT_EQ(13u, dst_pixels_c[2]);
- EXPECT_EQ(5u, dst_pixels_c[3]);
- EXPECT_EQ(0u, dst_pixels_c[4]);
- EXPECT_EQ(10u, dst_pixels_c[63]);
-
- // Test one pixel less, should skip the last pixel.
- memset(dst_pixels_c, 0, sizeof(dst_pixels_c));
- ScaleRowDown2Box_Odd_C(orig_pixels, 128, dst_pixels_c, 63);
-
- EXPECT_EQ(64u, dst_pixels_c[0]);
- EXPECT_EQ(25u, dst_pixels_c[1]);
- EXPECT_EQ(13u, dst_pixels_c[2]);
- EXPECT_EQ(5u, dst_pixels_c[3]);
- EXPECT_EQ(0u, dst_pixels_c[4]);
- EXPECT_EQ(0u, dst_pixels_c[63]);
-
- // Test regular half size SSSE3.
- ScaleRowDown2Box_SSSE3(orig_pixels, 128, dst_pixels_opt, 64);
-
- EXPECT_EQ(64u, dst_pixels_opt[0]);
- EXPECT_EQ(25u, dst_pixels_opt[1]);
- EXPECT_EQ(13u, dst_pixels_opt[2]);
- EXPECT_EQ(5u, dst_pixels_opt[3]);
- EXPECT_EQ(0u, dst_pixels_opt[4]);
- EXPECT_EQ(133u, dst_pixels_opt[63]);
-
- // Compare C and SSSE3 match.
- ScaleRowDown2Box_Odd_C(orig_pixels, 128, dst_pixels_c, 64);
- ScaleRowDown2Box_Odd_SSSE3(orig_pixels, 128, dst_pixels_opt, 64);
- for (int i = 0; i < 64; ++i) {
- EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
- }
- }
-}
-#endif // HAS_SCALEROWDOWN2_SSSE3
-
-extern "C" void ScaleRowUp2_16_NEON(const uint16_t* src_ptr,
- ptrdiff_t src_stride,
- uint16_t* dst,
- int dst_width);
-extern "C" void ScaleRowUp2_16_C(const uint16_t* src_ptr,
- ptrdiff_t src_stride,
- uint16_t* dst,
- int dst_width);
-
-TEST_F(LibYUVScaleTest, TestScaleRowUp2_16) {
- SIMD_ALIGNED(uint16_t orig_pixels[640 * 2 + 1]); // 2 rows + 1 pixel overrun.
- SIMD_ALIGNED(uint16_t dst_pixels_opt[1280]);
- SIMD_ALIGNED(uint16_t dst_pixels_c[1280]);
-
- memset(orig_pixels, 0, sizeof(orig_pixels));
- memset(dst_pixels_opt, 1, sizeof(dst_pixels_opt));
- memset(dst_pixels_c, 2, sizeof(dst_pixels_c));
-
- for (int i = 0; i < 640 * 2 + 1; ++i) {
- orig_pixels[i] = i;
- }
- ScaleRowUp2_16_C(&orig_pixels[0], 640, &dst_pixels_c[0], 1280);
- for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
-#if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
- int has_neon = TestCpuFlag(kCpuHasNEON);
- if (has_neon) {
- ScaleRowUp2_16_NEON(&orig_pixels[0], 640, &dst_pixels_opt[0], 1280);
- } else {
- ScaleRowUp2_16_C(&orig_pixels[0], 640, &dst_pixels_opt[0], 1280);
- }
-#else
- ScaleRowUp2_16_C(&orig_pixels[0], 640, &dst_pixels_opt[0], 1280);
-#endif
- }
-
- for (int i = 0; i < 1280; ++i) {
- EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
- }
- EXPECT_EQ(dst_pixels_c[0], (0 * 9 + 1 * 3 + 640 * 3 + 641 * 1 + 8) / 16);
- EXPECT_EQ(dst_pixels_c[1279], 800);
-}
-
-extern "C" void ScaleRowDown2Box_16_NEON(const uint16_t* src_ptr,
- ptrdiff_t src_stride,
- uint16_t* dst,
- int dst_width);
-
-TEST_F(LibYUVScaleTest, TestScaleRowDown2Box_16) {
- SIMD_ALIGNED(uint16_t orig_pixels[2560 * 2]);
- SIMD_ALIGNED(uint16_t dst_pixels_c[1280]);
- SIMD_ALIGNED(uint16_t dst_pixels_opt[1280]);
-
- memset(orig_pixels, 0, sizeof(orig_pixels));
- memset(dst_pixels_c, 1, sizeof(dst_pixels_c));
- memset(dst_pixels_opt, 2, sizeof(dst_pixels_opt));
-
- for (int i = 0; i < 2560 * 2; ++i) {
- orig_pixels[i] = i;
- }
- ScaleRowDown2Box_16_C(&orig_pixels[0], 2560, &dst_pixels_c[0], 1280);
- for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
-#if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
- int has_neon = TestCpuFlag(kCpuHasNEON);
- if (has_neon) {
- ScaleRowDown2Box_16_NEON(&orig_pixels[0], 2560, &dst_pixels_opt[0], 1280);
- } else {
- ScaleRowDown2Box_16_C(&orig_pixels[0], 2560, &dst_pixels_opt[0], 1280);
- }
-#else
- ScaleRowDown2Box_16_C(&orig_pixels[0], 2560, &dst_pixels_opt[0], 1280);
-#endif
- }
-
- for (int i = 0; i < 1280; ++i) {
- EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
- }
-
- EXPECT_EQ(dst_pixels_c[0], (0 + 1 + 2560 + 2561 + 2) / 4);
- EXPECT_EQ(dst_pixels_c[1279], 3839);
-}
-#endif // ENABLE_ROW_TESTS
-
-// Test scaling plane with 8 bit C vs 12 bit C and return maximum pixel
-// difference.
-// 0 = exact.
-static int TestPlaneFilter_16(int src_width,
- int src_height,
- int dst_width,
- int dst_height,
- FilterMode f,
- int benchmark_iterations,
- int disable_cpu_flags,
- int benchmark_cpu_info) {
- if (!SizeValid(src_width, src_height, dst_width, dst_height)) {
- return 0;
- }
-
- int i;
- int64_t src_y_plane_size = (Abs(src_width)) * (Abs(src_height));
- int src_stride_y = Abs(src_width);
- int dst_y_plane_size = dst_width * dst_height;
- int dst_stride_y = dst_width;
-
- align_buffer_page_end(src_y, src_y_plane_size);
- align_buffer_page_end(src_y_16, src_y_plane_size * 2);
- align_buffer_page_end(dst_y_8, dst_y_plane_size);
- align_buffer_page_end(dst_y_16, dst_y_plane_size * 2);
- uint16_t* p_src_y_16 = reinterpret_cast<uint16_t*>(src_y_16);
- uint16_t* p_dst_y_16 = reinterpret_cast<uint16_t*>(dst_y_16);
-
- MemRandomize(src_y, src_y_plane_size);
- memset(dst_y_8, 0, dst_y_plane_size);
- memset(dst_y_16, 1, dst_y_plane_size * 2);
-
- for (i = 0; i < src_y_plane_size; ++i) {
- p_src_y_16[i] = src_y[i] & 255;
- }
-
- MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization.
- ScalePlane(src_y, src_stride_y, src_width, src_height, dst_y_8, dst_stride_y,
- dst_width, dst_height, f);
- MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization.
-
- for (i = 0; i < benchmark_iterations; ++i) {
- ScalePlane_16(p_src_y_16, src_stride_y, src_width, src_height, p_dst_y_16,
- dst_stride_y, dst_width, dst_height, f);
- }
-
- // Expect an exact match.
- int max_diff = 0;
- for (i = 0; i < dst_y_plane_size; ++i) {
- int abs_diff = Abs(dst_y_8[i] - p_dst_y_16[i]);
- if (abs_diff > max_diff) {
- max_diff = abs_diff;
- }
- }
-
- free_aligned_buffer_page_end(dst_y_8);
- free_aligned_buffer_page_end(dst_y_16);
- free_aligned_buffer_page_end(src_y);
- free_aligned_buffer_page_end(src_y_16);
-
- return max_diff;
-}
-
-// The following adjustments in dimensions ensure the scale factor will be
-// exactly achieved.
-// 2 is chroma subsample.
-#define DX(x, nom, denom) static_cast<int>(((Abs(x) / nom + 1) / 2) * nom * 2)
-#define SX(x, nom, denom) static_cast<int>(((x / nom + 1) / 2) * denom * 2)
-
-#define TEST_FACTOR1(name, filter, nom, denom, max_diff) \
- TEST_F(LibYUVScaleTest, DISABLED_##ScalePlaneDownBy##name##_##filter##_16) { \
- int diff = TestPlaneFilter_16( \
- SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \
- DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \
- kFilter##filter, benchmark_iterations_, disable_cpu_flags_, \
- benchmark_cpu_info_); \
- EXPECT_LE(diff, max_diff); \
- }
-
-// Test a scale factor with all 4 filters. Expect unfiltered to be exact, but
-// filtering is different fixed point implementations for SSSE3, Neon and C.
-#define TEST_FACTOR(name, nom, denom, boxdiff) \
- TEST_FACTOR1(name, None, nom, denom, 0) \
- TEST_FACTOR1(name, Linear, nom, denom, boxdiff) \
- TEST_FACTOR1(name, Bilinear, nom, denom, boxdiff) \
- TEST_FACTOR1(name, Box, nom, denom, boxdiff)
-
-TEST_FACTOR(2, 1, 2, 0)
-TEST_FACTOR(4, 1, 4, 0)
-// TEST_FACTOR(8, 1, 8, 0) Disable for benchmark performance. Takes 90 seconds.
-TEST_FACTOR(3by4, 3, 4, 1)
-TEST_FACTOR(3by8, 3, 8, 1)
-TEST_FACTOR(3, 1, 3, 0)
-#undef TEST_FACTOR1
-#undef TEST_FACTOR
-#undef SX
-#undef DX
-
-TEST_F(LibYUVScaleTest, PlaneTest3x) {
- const int kSrcStride = 480;
- const int kDstStride = 160;
- const int kSize = kSrcStride * 3;
- align_buffer_page_end(orig_pixels, kSize);
- for (int i = 0; i < 480 * 3; ++i) {
- orig_pixels[i] = i;
- }
- align_buffer_page_end(dest_pixels, kDstStride);
-
- int iterations160 = (benchmark_width_ * benchmark_height_ + (160 - 1)) / 160 *
- benchmark_iterations_;
- for (int i = 0; i < iterations160; ++i) {
- ScalePlane(orig_pixels, kSrcStride, 480, 3, dest_pixels, kDstStride, 160, 1,
- kFilterBilinear);
- }
-
- EXPECT_EQ(225, dest_pixels[0]);
-
- ScalePlane(orig_pixels, kSrcStride, 480, 3, dest_pixels, kDstStride, 160, 1,
- kFilterNone);
-
- EXPECT_EQ(225, dest_pixels[0]);
-
- free_aligned_buffer_page_end(dest_pixels);
- free_aligned_buffer_page_end(orig_pixels);
-}
-
-TEST_F(LibYUVScaleTest, PlaneTest4x) {
- const int kSrcStride = 640;
- const int kDstStride = 160;
- const int kSize = kSrcStride * 4;
- align_buffer_page_end(orig_pixels, kSize);
- for (int i = 0; i < 640 * 4; ++i) {
- orig_pixels[i] = i;
- }
- align_buffer_page_end(dest_pixels, kDstStride);
-
- int iterations160 = (benchmark_width_ * benchmark_height_ + (160 - 1)) / 160 *
- benchmark_iterations_;
- for (int i = 0; i < iterations160; ++i) {
- ScalePlane(orig_pixels, kSrcStride, 640, 4, dest_pixels, kDstStride, 160, 1,
- kFilterBilinear);
- }
-
- EXPECT_EQ(66, dest_pixels[0]);
-
- ScalePlane(orig_pixels, kSrcStride, 640, 4, dest_pixels, kDstStride, 160, 1,
- kFilterNone);
-
- EXPECT_EQ(2, dest_pixels[0]); // expect the 3rd pixel of the 3rd row
-
- free_aligned_buffer_page_end(dest_pixels);
- free_aligned_buffer_page_end(orig_pixels);
-}
-
-// Intent is to test 200x50 to 50x200 but width and height can be parameters.
-TEST_F(LibYUVScaleTest, PlaneTestRotate_None) {
- const int kSize = benchmark_width_ * benchmark_height_;
- align_buffer_page_end(orig_pixels, kSize);
- for (int i = 0; i < kSize; ++i) {
- orig_pixels[i] = i;
- }
- align_buffer_page_end(dest_opt_pixels, kSize);
- align_buffer_page_end(dest_c_pixels, kSize);
-
- MaskCpuFlags(disable_cpu_flags_); // Disable all CPU optimization.
- ScalePlane(orig_pixels, benchmark_width_, benchmark_width_, benchmark_height_,
- dest_c_pixels, benchmark_height_, benchmark_height_,
- benchmark_width_, kFilterNone);
- MaskCpuFlags(benchmark_cpu_info_); // Enable all CPU optimization.
-
- for (int i = 0; i < benchmark_iterations_; ++i) {
- ScalePlane(orig_pixels, benchmark_width_, benchmark_width_,
- benchmark_height_, dest_opt_pixels, benchmark_height_,
- benchmark_height_, benchmark_width_, kFilterNone);
- }
-
- for (int i = 0; i < kSize; ++i) {
- EXPECT_EQ(dest_c_pixels[i], dest_opt_pixels[i]);
- }
-
- free_aligned_buffer_page_end(dest_c_pixels);
- free_aligned_buffer_page_end(dest_opt_pixels);
- free_aligned_buffer_page_end(orig_pixels);
-}
-
-TEST_F(LibYUVScaleTest, PlaneTestRotate_Bilinear) {
- const int kSize = benchmark_width_ * benchmark_height_;
- align_buffer_page_end(orig_pixels, kSize);
- for (int i = 0; i < kSize; ++i) {
- orig_pixels[i] = i;
- }
- align_buffer_page_end(dest_opt_pixels, kSize);
- align_buffer_page_end(dest_c_pixels, kSize);
-
- MaskCpuFlags(disable_cpu_flags_); // Disable all CPU optimization.
- ScalePlane(orig_pixels, benchmark_width_, benchmark_width_, benchmark_height_,
- dest_c_pixels, benchmark_height_, benchmark_height_,
- benchmark_width_, kFilterBilinear);
- MaskCpuFlags(benchmark_cpu_info_); // Enable all CPU optimization.
-
- for (int i = 0; i < benchmark_iterations_; ++i) {
- ScalePlane(orig_pixels, benchmark_width_, benchmark_width_,
- benchmark_height_, dest_opt_pixels, benchmark_height_,
- benchmark_height_, benchmark_width_, kFilterBilinear);
- }
-
- for (int i = 0; i < kSize; ++i) {
- EXPECT_EQ(dest_c_pixels[i], dest_opt_pixels[i]);
- }
-
- free_aligned_buffer_page_end(dest_c_pixels);
- free_aligned_buffer_page_end(dest_opt_pixels);
- free_aligned_buffer_page_end(orig_pixels);
-}
-
-// Intent is to test 200x50 to 50x200 but width and height can be parameters.
-TEST_F(LibYUVScaleTest, PlaneTestRotate_Box) {
- const int kSize = benchmark_width_ * benchmark_height_;
- align_buffer_page_end(orig_pixels, kSize);
- for (int i = 0; i < kSize; ++i) {
- orig_pixels[i] = i;
- }
- align_buffer_page_end(dest_opt_pixels, kSize);
- align_buffer_page_end(dest_c_pixels, kSize);
-
- MaskCpuFlags(disable_cpu_flags_); // Disable all CPU optimization.
- ScalePlane(orig_pixels, benchmark_width_, benchmark_width_, benchmark_height_,
- dest_c_pixels, benchmark_height_, benchmark_height_,
- benchmark_width_, kFilterBox);
- MaskCpuFlags(benchmark_cpu_info_); // Enable all CPU optimization.
-
- for (int i = 0; i < benchmark_iterations_; ++i) {
- ScalePlane(orig_pixels, benchmark_width_, benchmark_width_,
- benchmark_height_, dest_opt_pixels, benchmark_height_,
- benchmark_height_, benchmark_width_, kFilterBox);
- }
-
- for (int i = 0; i < kSize; ++i) {
- EXPECT_EQ(dest_c_pixels[i], dest_opt_pixels[i]);
- }
-
- free_aligned_buffer_page_end(dest_c_pixels);
- free_aligned_buffer_page_end(dest_opt_pixels);
- free_aligned_buffer_page_end(orig_pixels);
-}
-
-TEST_F(LibYUVScaleTest, PlaneTest1_Box) {
- align_buffer_page_end(orig_pixels, 3);
- align_buffer_page_end(dst_pixels, 3);
-
- // Pad the 1x1 byte image with invalid values before and after in case libyuv
- // reads outside the memory boundaries.
- orig_pixels[0] = 0;
- orig_pixels[1] = 1; // scale this pixel
- orig_pixels[2] = 2;
- dst_pixels[0] = 3;
- dst_pixels[1] = 3;
- dst_pixels[2] = 3;
-
- libyuv::ScalePlane(orig_pixels + 1, /* src_stride= */ 1, /* src_width= */ 1,
- /* src_height= */ 1, dst_pixels, /* dst_stride= */ 1,
- /* dst_width= */ 1, /* dst_height= */ 2,
- libyuv::kFilterBox);
-
- EXPECT_EQ(dst_pixels[0], 1);
- EXPECT_EQ(dst_pixels[1], 1);
- EXPECT_EQ(dst_pixels[2], 3);
-
- free_aligned_buffer_page_end(dst_pixels);
- free_aligned_buffer_page_end(orig_pixels);
-}
-
-TEST_F(LibYUVScaleTest, PlaneTest1_16_Box) {
- align_buffer_page_end(orig_pixels_alloc, 3 * 2);
- align_buffer_page_end(dst_pixels_alloc, 3 * 2);
- uint16_t* orig_pixels = (uint16_t*)orig_pixels_alloc;
- uint16_t* dst_pixels = (uint16_t*)dst_pixels_alloc;
-
- // Pad the 1x1 byte image with invalid values before and after in case libyuv
- // reads outside the memory boundaries.
- orig_pixels[0] = 0;
- orig_pixels[1] = 1; // scale this pixel
- orig_pixels[2] = 2;
- dst_pixels[0] = 3;
- dst_pixels[1] = 3;
- dst_pixels[2] = 3;
-
- libyuv::ScalePlane_16(
- orig_pixels + 1, /* src_stride= */ 1, /* src_width= */ 1,
- /* src_height= */ 1, dst_pixels, /* dst_stride= */ 1,
- /* dst_width= */ 1, /* dst_height= */ 2, libyuv::kFilterNone);
-
- EXPECT_EQ(dst_pixels[0], 1);
- EXPECT_EQ(dst_pixels[1], 1);
- EXPECT_EQ(dst_pixels[2], 3);
-
- free_aligned_buffer_page_end(dst_pixels_alloc);
- free_aligned_buffer_page_end(orig_pixels_alloc);
-}
} // namespace libyuv
diff --git a/files/unit_test/scale_uv_test.cc b/unit_test/scale_uv_test.cc
index 3d524bef..dab217c9 100644
--- a/files/unit_test/scale_uv_test.cc
+++ b/unit_test/scale_uv_test.cc
@@ -39,55 +39,35 @@ static int UVTestFilter(int src_width,
return 0;
}
- int i, j;
- const int b = 0; // 128 to test for padding/stride.
- int64_t src_uv_plane_size =
- (Abs(src_width) + b * 2) * (Abs(src_height) + b * 2) * 2LL;
- int src_stride_uv = (b * 2 + Abs(src_width)) * 2;
+ int i;
+ int64_t src_uv_plane_size = Abs(src_width) * Abs(src_height) * 2LL;
+ int src_stride_uv = Abs(src_width) * 2;
+ int64_t dst_uv_plane_size = dst_width * dst_height * 2LL;
+ int dst_stride_uv = dst_width * 2;
align_buffer_page_end(src_uv, src_uv_plane_size);
- if (!src_uv) {
- printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n");
- return 0;
- }
- MemRandomize(src_uv, src_uv_plane_size);
-
- int64_t dst_uv_plane_size = (dst_width + b * 2) * (dst_height + b * 2) * 2LL;
- int dst_stride_uv = (b * 2 + dst_width) * 2;
-
align_buffer_page_end(dst_uv_c, dst_uv_plane_size);
align_buffer_page_end(dst_uv_opt, dst_uv_plane_size);
- if (!dst_uv_c || !dst_uv_opt) {
+
+ if (!src_uv || !dst_uv_c || !dst_uv_opt) {
printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n");
return 0;
}
+ MemRandomize(src_uv, src_uv_plane_size);
memset(dst_uv_c, 2, dst_uv_plane_size);
- memset(dst_uv_opt, 3, dst_uv_plane_size);
-
- // Warm up both versions for consistent benchmarks.
- MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization.
- UVScale(src_uv + (src_stride_uv * b) + b * 2, src_stride_uv, src_width,
- src_height, dst_uv_c + (dst_stride_uv * b) + b * 2, dst_stride_uv,
- dst_width, dst_height, f);
- MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization.
- UVScale(src_uv + (src_stride_uv * b) + b * 2, src_stride_uv, src_width,
- src_height, dst_uv_opt + (dst_stride_uv * b) + b * 2, dst_stride_uv,
- dst_width, dst_height, f);
+ memset(dst_uv_opt, 123, dst_uv_plane_size);
MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization.
double c_time = get_time();
- UVScale(src_uv + (src_stride_uv * b) + b * 2, src_stride_uv, src_width,
- src_height, dst_uv_c + (dst_stride_uv * b) + b * 2, dst_stride_uv,
+ UVScale(src_uv, src_stride_uv, src_width, src_height, dst_uv_c, dst_stride_uv,
dst_width, dst_height, f);
-
c_time = (get_time() - c_time);
MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization.
double opt_time = get_time();
for (i = 0; i < benchmark_iterations; ++i) {
- UVScale(src_uv + (src_stride_uv * b) + b * 2, src_stride_uv, src_width,
- src_height, dst_uv_opt + (dst_stride_uv * b) + b * 2, dst_stride_uv,
- dst_width, dst_height, f);
+ UVScale(src_uv, src_stride_uv, src_width, src_height, dst_uv_opt,
+ dst_stride_uv, dst_width, dst_height, f);
}
opt_time = (get_time() - opt_time) / benchmark_iterations;
@@ -95,18 +75,11 @@ static int UVTestFilter(int src_width,
printf("filter %d - %8d us C - %8d us OPT\n", f,
static_cast<int>(c_time * 1e6), static_cast<int>(opt_time * 1e6));
- // C version may be a little off from the optimized. Order of
- // operations may introduce rounding somewhere. So do a difference
- // of the buffers and look to see that the max difference isn't
- // over 2.
int max_diff = 0;
- for (i = b; i < (dst_height + b); ++i) {
- for (j = b * 2; j < (dst_width + b) * 2; ++j) {
- int abs_diff = Abs(dst_uv_c[(i * dst_stride_uv) + j] -
- dst_uv_opt[(i * dst_stride_uv) + j]);
- if (abs_diff > max_diff) {
- max_diff = abs_diff;
- }
+ for (i = 0; i < dst_uv_plane_size; ++i) {
+ int abs_diff = Abs(dst_uv_c[i] - dst_uv_opt[i]);
+ if (abs_diff > max_diff) {
+ max_diff = abs_diff;
}
}
@@ -121,28 +94,26 @@ static int UVTestFilter(int src_width,
#define DX(x, nom, denom) static_cast<int>((Abs(x) / nom) * nom)
#define SX(x, nom, denom) static_cast<int>((x / nom) * denom)
-#define TEST_FACTOR1(name, filter, nom, denom, max_diff) \
+#define TEST_FACTOR1(name, filter, nom, denom) \
TEST_F(LibYUVScaleTest, UVScaleDownBy##name##_##filter) { \
int diff = UVTestFilter( \
SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \
DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \
kFilter##filter, benchmark_iterations_, disable_cpu_flags_, \
benchmark_cpu_info_); \
- EXPECT_LE(diff, max_diff); \
+ EXPECT_EQ(0, diff); \
}
#if defined(ENABLE_FULL_TESTS)
-// Test a scale factor with all 4 filters. Expect unfiltered to be exact, but
-// filtering is different fixed point implementations for SSSE3, Neon and C.
-#define TEST_FACTOR(name, nom, denom) \
- TEST_FACTOR1(name, None, nom, denom, 0) \
- TEST_FACTOR1(name, Linear, nom, denom, 3) \
- TEST_FACTOR1(name, Bilinear, nom, denom, 3) \
- TEST_FACTOR1(name, Box, nom, denom, 3)
+// Test a scale factor with all 4 filters. Expect exact for SIMD vs C.
+#define TEST_FACTOR(name, nom, denom) \
+ TEST_FACTOR1(name, None, nom, denom) \
+ TEST_FACTOR1(name, Linear, nom, denom) \
+ TEST_FACTOR1(name, Bilinear, nom, denom) \
+ TEST_FACTOR1(name, Box, nom, denom)
#else
// Test a scale factor with Bilinear.
-#define TEST_FACTOR(name, nom, denom) \
- TEST_FACTOR1(name, Bilinear, nom, denom, 3)
+#define TEST_FACTOR(name, nom, denom) TEST_FACTOR1(name, Bilinear, nom, denom)
#endif
TEST_FACTOR(2, 1, 2)
diff --git a/files/unit_test/testdata/arm_v7.txt b/unit_test/testdata/arm_v7.txt
index 5d7dbd04..5d7dbd04 100644
--- a/files/unit_test/testdata/arm_v7.txt
+++ b/unit_test/testdata/arm_v7.txt
diff --git a/files/unit_test/testdata/juno.txt b/unit_test/testdata/juno.txt
index dd465272..dd465272 100644
--- a/files/unit_test/testdata/juno.txt
+++ b/unit_test/testdata/juno.txt
diff --git a/files/unit_test/testdata/mips.txt b/unit_test/testdata/mips.txt
index d9f28cbf..d9f28cbf 100644
--- a/files/unit_test/testdata/mips.txt
+++ b/unit_test/testdata/mips.txt
diff --git a/files/unit_test/testdata/mips_loongson2k.txt b/unit_test/testdata/mips_loongson2k.txt
index 8a88d38f..8a88d38f 100644
--- a/files/unit_test/testdata/mips_loongson2k.txt
+++ b/unit_test/testdata/mips_loongson2k.txt
diff --git a/files/unit_test/testdata/mips_loongson3.txt b/unit_test/testdata/mips_loongson3.txt
index 1f540b12..1f540b12 100644
--- a/files/unit_test/testdata/mips_loongson3.txt
+++ b/unit_test/testdata/mips_loongson3.txt
diff --git a/files/unit_test/testdata/mips_loongson_mmi.txt b/unit_test/testdata/mips_loongson_mmi.txt
index 0f10b8bb..0f10b8bb 100644
--- a/files/unit_test/testdata/mips_loongson_mmi.txt
+++ b/unit_test/testdata/mips_loongson_mmi.txt
diff --git a/files/unit_test/testdata/mips_msa.txt b/unit_test/testdata/mips_msa.txt
index ac930615..ac930615 100644
--- a/files/unit_test/testdata/mips_msa.txt
+++ b/unit_test/testdata/mips_msa.txt
diff --git a/unit_test/testdata/riscv64.txt b/unit_test/testdata/riscv64.txt
new file mode 100644
index 00000000..fbb4200f
--- /dev/null
+++ b/unit_test/testdata/riscv64.txt
@@ -0,0 +1,4 @@
+processor : 0
+hart : 1
+isa : rv64imac
+mmu : sv48 \ No newline at end of file
diff --git a/unit_test/testdata/riscv64_rvv.txt b/unit_test/testdata/riscv64_rvv.txt
new file mode 100644
index 00000000..af1b3f36
--- /dev/null
+++ b/unit_test/testdata/riscv64_rvv.txt
@@ -0,0 +1,4 @@
+processor : 0
+hart : 1
+isa : rv64imafdcv
+mmu : sv48 \ No newline at end of file
diff --git a/unit_test/testdata/riscv64_rvv_zvfh.txt b/unit_test/testdata/riscv64_rvv_zvfh.txt
new file mode 100644
index 00000000..c416c1af
--- /dev/null
+++ b/unit_test/testdata/riscv64_rvv_zvfh.txt
@@ -0,0 +1,4 @@
+processor : 0
+hart : 1
+isa : rv64imafdcv_zfh_zvfh
+mmu : sv48 \ No newline at end of file
diff --git a/files/unit_test/testdata/tegra3.txt b/unit_test/testdata/tegra3.txt
index d1b09f6b..d1b09f6b 100644
--- a/files/unit_test/testdata/tegra3.txt
+++ b/unit_test/testdata/tegra3.txt
diff --git a/files/unit_test/testdata/test0.jpg b/unit_test/testdata/test0.jpg
index f4461a81..f4461a81 100644
--- a/files/unit_test/testdata/test0.jpg
+++ b/unit_test/testdata/test0.jpg
Binary files differ
diff --git a/files/unit_test/testdata/test1.jpg b/unit_test/testdata/test1.jpg
index a0210e9d..a0210e9d 100644
--- a/files/unit_test/testdata/test1.jpg
+++ b/unit_test/testdata/test1.jpg
Binary files differ
diff --git a/files/unit_test/testdata/test2.jpg b/unit_test/testdata/test2.jpg
index 816ca767..816ca767 100644
--- a/files/unit_test/testdata/test2.jpg
+++ b/unit_test/testdata/test2.jpg
Binary files differ
diff --git a/files/unit_test/testdata/test3.jpg b/unit_test/testdata/test3.jpg
index 792d91dc..792d91dc 100644
--- a/files/unit_test/testdata/test3.jpg
+++ b/unit_test/testdata/test3.jpg
Binary files differ
diff --git a/files/unit_test/testdata/test4.jpg b/unit_test/testdata/test4.jpg
index 1ef41668..1ef41668 100644
--- a/files/unit_test/testdata/test4.jpg
+++ b/unit_test/testdata/test4.jpg
Binary files differ
diff --git a/files/unit_test/unit_test.cc b/unit_test/unit_test.cc
index 61145a46..239d5b92 100644
--- a/files/unit_test/unit_test.cc
+++ b/unit_test/unit_test.cc
@@ -88,6 +88,11 @@ int TestCpuEnv(int cpu_info) {
cpu_info &= ~libyuv::kCpuHasLASX;
}
#endif
+#if defined(__riscv) && defined(__linux__)
+ if (TestEnv("LIBYUV_DISABLE_RVV")) {
+ cpu_info &= ~libyuv::kCpuHasRVV;
+ }
+#endif
#if !defined(__pnacl__) && !defined(__CLR_VER) && \
(defined(__x86_64__) || defined(_M_X64) || defined(__i386__) || \
defined(_M_IX86))
@@ -139,11 +144,14 @@ int TestCpuEnv(int cpu_info) {
if (TestEnv("LIBYUV_DISABLE_AVX512VBITALG")) {
cpu_info &= ~libyuv::kCpuHasAVX512VBITALG;
}
- if (TestEnv("LIBYUV_DISABLE_AVX512VPOPCNTDQ")) {
- cpu_info &= ~libyuv::kCpuHasAVX512VPOPCNTDQ;
+ if (TestEnv("LIBYUV_DISABLE_AVX10")) {
+ cpu_info &= ~libyuv::kCpuHasAVX10;
+ }
+ if (TestEnv("LIBYUV_DISABLE_AVXVNNI")) {
+ cpu_info &= ~libyuv::kCpuHasAVXVNNI;
}
- if (TestEnv("LIBYUV_DISABLE_GFNI")) {
- cpu_info &= ~libyuv::kCpuHasGFNI;
+ if (TestEnv("LIBYUV_DISABLE_AVXVNNIINT8")) {
+ cpu_info &= ~libyuv::kCpuHasAVXVNNIINT8;
}
#endif
if (TestEnv("LIBYUV_DISABLE_ASM")) {
diff --git a/files/unit_test/unit_test.h b/unit_test/unit_test.h
index 0a8df4d2..99cc8d19 100644
--- a/files/unit_test/unit_test.h
+++ b/unit_test/unit_test.h
@@ -11,10 +11,10 @@
#ifndef UNIT_TEST_UNIT_TEST_H_ // NOLINT
#define UNIT_TEST_UNIT_TEST_H_
+#include <stddef.h> // For NULL
#ifdef _WIN32
#include <windows.h>
#else
-#include <sys/resource.h>
#include <sys/time.h>
#endif
@@ -77,7 +77,18 @@ static inline bool SizeValid(int src_width,
#define free_aligned_buffer_page_end(var) \
free(var##_mem); \
- var = 0
+ var = NULL
+
+#define align_buffer_page_end_16(var, size) \
+ uint8_t* var##_mem = \
+ reinterpret_cast<uint8_t*>(malloc(((size)*2 + 4095 + 63) & ~4095)); \
+ uint16_t* var = reinterpret_cast<uint16_t*>( \
+ (intptr_t)(var##_mem + (((size)*2 + 4095 + 63) & ~4095) - (size)*2) & \
+ ~63)
+
+#define free_aligned_buffer_page_end_16(var) \
+ free(var##_mem); \
+ var = NULL
#ifdef WIN32
static inline double get_time() {
diff --git a/files/unit_test/video_common_test.cc b/unit_test/video_common_test.cc
index 36728ea9..36728ea9 100644
--- a/files/unit_test/video_common_test.cc
+++ b/unit_test/video_common_test.cc
diff --git a/files/util/Makefile b/util/Makefile
index 40e74b65..40e74b65 100644
--- a/files/util/Makefile
+++ b/util/Makefile
diff --git a/files/util/color.cc b/util/color.cc
index 8c3bbefd..8c3bbefd 100644
--- a/files/util/color.cc
+++ b/util/color.cc
diff --git a/files/util/compare.cc b/util/compare.cc
index a16613ee..a16613ee 100644
--- a/files/util/compare.cc
+++ b/util/compare.cc
diff --git a/files/util/cpuid.c b/util/cpuid.c
index b618bb10..c07e6e95 100644
--- a/files/util/cpuid.c
+++ b/util/cpuid.c
@@ -21,8 +21,9 @@ using namespace libyuv;
int main(int argc, const char* argv[]) {
int cpu_flags = TestCpuFlag(-1);
int has_arm = TestCpuFlag(kCpuHasARM);
- int has_mips = TestCpuFlag(kCpuHasMIPS);
+ int has_riscv = TestCpuFlag(kCpuHasRISCV);
int has_x86 = TestCpuFlag(kCpuHasX86);
+ int has_mips = TestCpuFlag(kCpuHasMIPS);
int has_loongarch = TestCpuFlag(kCpuHasLOONGARCH);
(void)argc;
(void)argv;
@@ -62,24 +63,28 @@ int main(int argc, const char* argv[]) {
model, model);
}
#endif
- printf("Cpu Flags %x\n", cpu_flags);
- printf("Has ARM %x\n", has_arm);
- printf("Has MIPS %x\n", has_mips);
- printf("Has X86 %x\n", has_x86);
- printf("Has LOONGARCH %x\n", has_loongarch);
+ printf("Cpu Flags 0x%x\n", cpu_flags);
if (has_arm) {
int has_neon = TestCpuFlag(kCpuHasNEON);
- printf("Has NEON %x\n", has_neon);
+ printf("Has ARM 0x%x\n", has_arm);
+ printf("Has NEON 0x%x\n", has_neon);
+ }
+ if (has_riscv) {
+ int has_rvv = TestCpuFlag(kCpuHasRVV);
+ printf("Has RISCV 0x%x\n", has_riscv);
+ printf("Has RVV 0x%x\n", has_rvv);
}
if (has_mips) {
int has_msa = TestCpuFlag(kCpuHasMSA);
- printf("Has MSA %x\n", has_msa);
+ printf("Has MIPS 0x%x\n", has_mips);
+ printf("Has MSA 0x%x\n", has_msa);
}
if (has_loongarch) {
int has_lsx = TestCpuFlag(kCpuHasLSX);
- printf("Has LSX %x\n", has_lsx);
int has_lasx = TestCpuFlag(kCpuHasLASX);
- printf("Has LASX %x\n", has_lasx);
+ printf("Has LOONGARCH 0x%x\n", has_loongarch);
+ printf("Has LSX 0x%x\n", has_lsx);
+ printf("Has LASX 0x%x\n", has_lasx);
}
if (has_x86) {
int has_sse2 = TestCpuFlag(kCpuHasSSE2);
@@ -91,31 +96,34 @@ int main(int argc, const char* argv[]) {
int has_erms = TestCpuFlag(kCpuHasERMS);
int has_fma3 = TestCpuFlag(kCpuHasFMA3);
int has_f16c = TestCpuFlag(kCpuHasF16C);
- int has_gfni = TestCpuFlag(kCpuHasGFNI);
int has_avx512bw = TestCpuFlag(kCpuHasAVX512BW);
int has_avx512vl = TestCpuFlag(kCpuHasAVX512VL);
int has_avx512vnni = TestCpuFlag(kCpuHasAVX512VNNI);
int has_avx512vbmi = TestCpuFlag(kCpuHasAVX512VBMI);
int has_avx512vbmi2 = TestCpuFlag(kCpuHasAVX512VBMI2);
int has_avx512vbitalg = TestCpuFlag(kCpuHasAVX512VBITALG);
- int has_avx512vpopcntdq = TestCpuFlag(kCpuHasAVX512VPOPCNTDQ);
- printf("Has SSE2 %x\n", has_sse2);
- printf("Has SSSE3 %x\n", has_ssse3);
- printf("Has SSE4.1 %x\n", has_sse41);
- printf("Has SSE4.2 %x\n", has_sse42);
- printf("Has AVX %x\n", has_avx);
- printf("Has AVX2 %x\n", has_avx2);
- printf("Has ERMS %x\n", has_erms);
- printf("Has FMA3 %x\n", has_fma3);
- printf("Has F16C %x\n", has_f16c);
- printf("Has GFNI %x\n", has_gfni);
- printf("Has AVX512BW %x\n", has_avx512bw);
- printf("Has AVX512VL %x\n", has_avx512vl);
- printf("Has AVX512VNNI %x\n", has_avx512vnni);
- printf("Has AVX512VBMI %x\n", has_avx512vbmi);
- printf("Has AVX512VBMI2 %x\n", has_avx512vbmi2);
- printf("Has AVX512VBITALG %x\n", has_avx512vbitalg);
- printf("Has AVX512VPOPCNTDQ %x\n", has_avx512vpopcntdq);
+ int has_avx10 = TestCpuFlag(kCpuHasAVX10);
+ int has_avxvnni = TestCpuFlag(kCpuHasAVXVNNI);
+ int has_avxvnniint8 = TestCpuFlag(kCpuHasAVXVNNIINT8);
+ printf("Has X86 0x%x\n", has_x86);
+ printf("Has SSE2 0x%x\n", has_sse2);
+ printf("Has SSSE3 0x%x\n", has_ssse3);
+ printf("Has SSE4.1 0x%x\n", has_sse41);
+ printf("Has SSE4.2 0x%x\n", has_sse42);
+ printf("Has AVX 0x%x\n", has_avx);
+ printf("Has AVX2 0x%x\n", has_avx2);
+ printf("Has ERMS 0x%x\n", has_erms);
+ printf("Has FMA3 0x%x\n", has_fma3);
+ printf("Has F16C 0x%x\n", has_f16c);
+ printf("Has AVX512BW 0x%x\n", has_avx512bw);
+ printf("Has AVX512VL 0x%x\n", has_avx512vl);
+ printf("Has AVX512VNNI 0x%x\n", has_avx512vnni);
+ printf("Has AVX512VBMI 0x%x\n", has_avx512vbmi);
+ printf("Has AVX512VBMI2 0x%x\n", has_avx512vbmi2);
+ printf("Has AVX512VBITALG 0x%x\n", has_avx512vbitalg);
+ printf("Has AVX10 0x%x\n", has_avx10);
+ printf("HAS AVXVNNI 0x%x\n", has_avxvnni);
+ printf("Has AVXVNNIINT8 0x%x\n", has_avxvnniint8);
}
return 0;
}
diff --git a/files/util/i444tonv12_eg.cc b/util/i444tonv12_eg.cc
index 0fcb4095..0fcb4095 100644
--- a/files/util/i444tonv12_eg.cc
+++ b/util/i444tonv12_eg.cc
diff --git a/files/util/psnr.cc b/util/psnr.cc
index c7bee7f9..c7bee7f9 100644
--- a/files/util/psnr.cc
+++ b/util/psnr.cc
diff --git a/files/util/psnr.h b/util/psnr.h
index aac128cb..aac128cb 100644
--- a/files/util/psnr.h
+++ b/util/psnr.h
diff --git a/files/util/psnr_main.cc b/util/psnr_main.cc
index 8b9fd972..8b9fd972 100644
--- a/files/util/psnr_main.cc
+++ b/util/psnr_main.cc
diff --git a/files/util/ssim.cc b/util/ssim.cc
index 096fbcf0..096fbcf0 100644
--- a/files/util/ssim.cc
+++ b/util/ssim.cc
diff --git a/files/util/ssim.h b/util/ssim.h
index a855f1d1..a855f1d1 100644
--- a/files/util/ssim.h
+++ b/util/ssim.h
diff --git a/files/util/yuvconstants.c b/util/yuvconstants.c
index 037e0824..4e5185af 100644
--- a/files/util/yuvconstants.c
+++ b/util/yuvconstants.c
@@ -43,9 +43,10 @@
// #define BR (-VR * 128 + YB)
int main(int argc, const char* argv[]) {
- if (argc < 2) {
- printf("yuvconstants Kr Kb\n");
- printf(" MC BT KR = 0.2126; KB = 0.0722\n");
+ if (argc < 3) {
+ printf("yuvconstants [KR] [KB]\n");
+ printf(" e.g. yuvconstants 0.2126 0.0722\n");
+ printf(" MC BT KR KB\n");
printf(" 1 BT.709 KR = 0.2126; KB = 0.0722\n");
printf(" 4 FCC KR = 0.30; KB = 0.11\n");
printf(" 6 BT.601 KR = 0.299; KB = 0.114\n");
@@ -53,8 +54,8 @@ int main(int argc, const char* argv[]) {
printf(" 9 BT.2020 KR = 0.2627; KB = 0.0593\n");
return -1;
}
- float kr = atof(argv[1]);
- float kb = atof(argv[2]);
+ float kr = (float)atof(argv[1]);
+ float kb = (float)atof(argv[2]);
float kg = 1 - kr - kb;
float vr = 2 * (1 - kr);
diff --git a/files/util/yuvconvert.cc b/util/yuvconvert.cc
index 332699e3..93b52668 100644
--- a/files/util/yuvconvert.cc
+++ b/util/yuvconvert.cc
@@ -42,9 +42,9 @@ static __inline uint32_t Abs(int32_t v) {
}
// Parse PYUV format. ie name.1920x800_24Hz_P420.yuv
-bool ExtractResolutionFromFilename(const char* name,
- int* width_ptr,
- int* height_ptr) {
+static bool ExtractResolutionFromFilename(const char* name,
+ int* width_ptr,
+ int* height_ptr) {
// Isolate the .width_height. section of the filename by searching for a
// dot or underscore followed by a digit.
for (int i = 0; name[i]; ++i) {
@@ -59,7 +59,7 @@ bool ExtractResolutionFromFilename(const char* name,
return false;
}
-void PrintHelp(const char* program) {
+static void PrintHelp(const char* program) {
printf("%s [-options] src_argb.raw dst_yuv.raw\n", program);
printf(
" -s <width> <height> .... specify source resolution. "
@@ -78,7 +78,7 @@ void PrintHelp(const char* program) {
exit(0);
}
-void ParseOptions(int argc, const char* argv[]) {
+static void ParseOptions(int argc, const char* argv[]) {
if (argc <= 1) {
PrintHelp(argv[0]);
}
diff --git a/files/winarm.mk b/winarm.mk
index b0a344ae..b0a344ae 100644
--- a/files/winarm.mk
+++ b/winarm.mk