aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndroid Build Coastguard Worker <android-build-coastguard-worker@google.com>2023-09-06 15:20:03 +0000
committerAndroid Build Coastguard Worker <android-build-coastguard-worker@google.com>2023-09-06 15:20:03 +0000
commit35b4f9c093f6de959ce95706fe5afa82b94f3aed (patch)
tree318b601d40604f7933f7ff0b364bcea559c6d35b
parent61206b6b77e25faa5a07ffc7822a437487c3b996 (diff)
parentdd06f86b98527b6b6425ea679bea5cc347f5afb4 (diff)
downloadlibyuv-aml_con_341511080.tar.gz
Snap for 10768428 from dd06f86b98527b6b6425ea679bea5cc347f5afb4 to mainline-conscrypt-releaseaml_con_341614000aml_con_341511080aml_con_341410300aml_con_341310090aml_con_341110000android14-mainline-conscrypt-release
Change-Id: I5065e2135cde11cc68e8a06cc1b8a158b49afb3e
-rw-r--r--METADATA4
-rw-r--r--README.version2
-rw-r--r--files/.gn4
-rw-r--r--files/.vpython34
-rw-r--r--files/Android.bp1
-rw-r--r--files/BUILD.gn22
-rw-r--r--files/CMakeLists.txt22
-rw-r--r--files/DEPS1091
-rw-r--r--files/README.chromium2
-rw-r--r--files/README.md1
-rw-r--r--files/build_overrides/build.gni3
-rw-r--r--files/build_overrides/partition_alloc.gni17
-rw-r--r--files/docs/environment_variables.md3
-rw-r--r--files/docs/getting_started.md29
-rw-r--r--files/include/libyuv/convert.h123
-rw-r--r--files/include/libyuv/convert_argb.h126
-rw-r--r--files/include/libyuv/convert_from_argb.h51
-rw-r--r--files/include/libyuv/cpu_id.h7
-rw-r--r--files/include/libyuv/planar_functions.h56
-rw-r--r--files/include/libyuv/rotate.h64
-rw-r--r--files/include/libyuv/rotate_row.h45
-rw-r--r--files/include/libyuv/row.h1077
-rw-r--r--files/include/libyuv/scale_row.h43
-rw-r--r--files/include/libyuv/version.h2
-rw-r--r--files/infra/config/PRESUBMIT.py2
-rw-r--r--files/infra/config/cr-buildbucket.cfg252
-rwxr-xr-xfiles/infra/config/main.star20
-rw-r--r--files/infra/config/project.cfg2
-rw-r--r--files/infra/config/realms.cfg4
-rw-r--r--files/libyuv.gni3
-rwxr-xr-xfiles/riscv_script/prepare_toolchain_qemu.sh74
-rw-r--r--files/riscv_script/riscv-clang.cmake52
-rwxr-xr-xfiles/riscv_script/run_qemu.sh15
-rw-r--r--files/source/compare.cc6
-rw-r--r--files/source/compare_gcc.cc2
-rw-r--r--files/source/compare_mmi.cc123
-rw-r--r--files/source/convert.cc893
-rw-r--r--files/source/convert_argb.cc1350
-rw-r--r--files/source/convert_from.cc24
-rw-r--r--files/source/convert_from_argb.cc1061
-rw-r--r--files/source/cpu_id.cc100
-rw-r--r--files/source/mjpeg_decoder.cc4
-rw-r--r--files/source/planar_functions.cc659
-rw-r--r--files/source/rotate.cc394
-rw-r--r--files/source/rotate_argb.cc16
-rw-r--r--files/source/rotate_common.cc127
-rw-r--r--files/source/rotate_gcc.cc130
-rw-r--r--files/source/rotate_mmi.cc291
-rw-r--r--files/source/rotate_neon.cc40
-rw-r--r--files/source/rotate_neon64.cc71
-rw-r--r--files/source/row_any.cc852
-rw-r--r--files/source/row_common.cc826
-rw-r--r--files/source/row_gcc.cc578
-rw-r--r--files/source/row_lasx.cc370
-rw-r--r--files/source/row_lsx.cc1514
-rw-r--r--files/source/row_mmi.cc7842
-rw-r--r--files/source/row_neon.cc268
-rw-r--r--files/source/row_neon64.cc255
-rw-r--r--files/source/row_rvv.cc956
-rw-r--r--files/source/row_win.cc65
-rw-r--r--files/source/scale.cc106
-rw-r--r--files/source/scale_any.cc16
-rw-r--r--files/source/scale_argb.cc98
-rw-r--r--files/source/scale_common.cc191
-rw-r--r--files/source/scale_gcc.cc5
-rw-r--r--files/source/scale_mmi.cc1168
-rw-r--r--files/source/scale_neon.cc39
-rw-r--r--files/source/scale_neon64.cc39
-rw-r--r--files/source/scale_uv.cc142
-rwxr-xr-xfiles/tools_libyuv/autoroller/roll_deps.py582
-rw-r--r--files/unit_test/convert_test.cc762
-rw-r--r--files/unit_test/cpu_test.cc146
-rw-r--r--files/unit_test/planar_test.cc97
-rw-r--r--files/unit_test/rotate_argb_test.cc106
-rw-r--r--files/unit_test/rotate_test.cc363
-rw-r--r--files/unit_test/scale_uv_test.cc79
-rw-r--r--files/unit_test/testdata/riscv64.txt4
-rw-r--r--files/unit_test/testdata/riscv64_rvv.txt4
-rw-r--r--files/unit_test/testdata/riscv64_rvv_zvfh.txt4
-rw-r--r--files/unit_test/unit_test.cc5
-rw-r--r--files/unit_test/unit_test.h15
-rw-r--r--files/util/cpuid.c60
-rw-r--r--files/util/yuvconstants.c11
-rw-r--r--files/util/yuvconvert.cc10
84 files changed, 13550 insertions, 12542 deletions
diff --git a/METADATA b/METADATA
index bff062d8..5508de20 100644
--- a/METADATA
+++ b/METADATA
@@ -8,7 +8,7 @@ third_party {
type: GIT
value: "https://chromium.googlesource.com/libyuv/libyuv/"
}
- version: "d53f1beecdd8d959f7a3f2e19bd0bd7e7227a233"
- last_upgrade_date { year: 2022 month: 8 day: 5 }
+ version: "2a6cb7431939faba1b40d3f08883847f0cf63572"
+ last_upgrade_date { year: 2023 month: 6 day: 1 }
license_type: NOTICE
}
diff --git a/README.version b/README.version
index 5deb188e..6eb9dc8c 100644
--- a/README.version
+++ b/README.version
@@ -1,4 +1,4 @@
-Version: r1837
+Version: r1871
BugComponent: 42195
Owner: lajos
Local Modifications:
diff --git a/files/.gn b/files/.gn
index a765caa5..f9a5ee6c 100644
--- a/files/.gn
+++ b/files/.gn
@@ -34,7 +34,5 @@ exec_script_whitelist = build_dotfile_settings.exec_script_whitelist +
default_args = {
mac_sdk_min = "10.12"
-
- # https://bugs.chromium.org/p/libyuv/issues/detail?id=826
- ios_deployment_target = "10.0"
+ ios_deployment_target = "12.0"
}
diff --git a/files/.vpython3 b/files/.vpython3
index 0a9aa38b..28d819e7 100644
--- a/files/.vpython3
+++ b/files/.vpython3
@@ -76,8 +76,8 @@ wheel: <
version: "version:5.8.0.chromium.2"
>
wheel: <
- name: "infra/python/wheels/requests-py2_py3"
- version: "version:2.26.0"
+ name: "infra/python/wheels/requests-py3"
+ version: "version:2.31.0"
>
# Used by various python unit tests.
diff --git a/files/Android.bp b/files/Android.bp
index 0c46f7f1..d02b56f3 100644
--- a/files/Android.bp
+++ b/files/Android.bp
@@ -62,6 +62,7 @@ cc_library {
"source/row_msa.cc",
"source/row_neon.cc",
"source/row_neon64.cc",
+ "source/row_rvv.cc",
"source/scale.cc",
"source/scale_any.cc",
"source/scale_argb.cc",
diff --git a/files/BUILD.gn b/files/BUILD.gn
index a72ff065..adaae9d8 100644
--- a/files/BUILD.gn
+++ b/files/BUILD.gn
@@ -6,6 +6,7 @@
# in the file PATENTS. All contributing project authors may
# be found in the AUTHORS file in the root of the source tree.
+import("//build/config/features.gni")
import("//testing/test.gni")
import("libyuv.gni")
@@ -21,15 +22,19 @@ declare_args() {
config("libyuv_config") {
include_dirs = [ "include" ]
- if (is_android && current_cpu == "arm64") {
- ldflags = [ "-Wl,--dynamic-linker,/system/bin/linker64" ]
- }
- if (is_android && current_cpu != "arm64") {
- ldflags = [ "-Wl,--dynamic-linker,/system/bin/linker" ]
+ if (is_android) {
+ if (target_cpu == "arm" || target_cpu == "x86" || target_cpu == "mipsel") {
+ ldflags = [ "-Wl,--dynamic-linker,/system/bin/linker" ]
+ } else {
+ ldflags = [ "-Wl,--dynamic-linker,/system/bin/linker64" ]
+ }
}
-
+ defines = []
if (!libyuv_use_neon) {
- defines = [ "LIBYUV_DISABLE_NEON" ]
+ defines += [ "LIBYUV_DISABLE_NEON" ]
+ }
+ if (libyuv_disable_rvv) {
+ defines += [ "LIBYUV_DISABLE_RVV" ]
}
}
@@ -129,6 +134,7 @@ static_library("libyuv_internal") {
"source/row_any.cc",
"source/row_common.cc",
"source/row_gcc.cc",
+ "source/row_rvv.cc",
"source/row_win.cc",
"source/scale.cc",
"source/scale_any.cc",
@@ -150,7 +156,7 @@ static_library("libyuv_internal") {
configs += [ "//build/config/gcc:symbol_visibility_default" ]
}
- if (!is_ios && !libyuv_disable_jpeg) {
+ if ((!is_ios || use_blink) && !libyuv_disable_jpeg) {
defines += [ "HAVE_JPEG" ]
# Needed to pull in libjpeg headers. Can't add //third_party:jpeg to deps
diff --git a/files/CMakeLists.txt b/files/CMakeLists.txt
index d190507b..7a4a1994 100644
--- a/files/CMakeLists.txt
+++ b/files/CMakeLists.txt
@@ -4,7 +4,7 @@
PROJECT ( YUV C CXX ) # "C" is required even for C++ projects
CMAKE_MINIMUM_REQUIRED( VERSION 2.8.12 )
-OPTION( TEST "Built unit tests" OFF )
+OPTION( UNIT_TEST "Built unit tests" OFF )
SET ( ly_base_dir ${PROJECT_SOURCE_DIR} )
SET ( ly_src_dir ${ly_base_dir}/source )
@@ -41,18 +41,24 @@ endif()
ADD_EXECUTABLE ( yuvconvert ${ly_base_dir}/util/yuvconvert.cc )
TARGET_LINK_LIBRARIES ( yuvconvert ${ly_lib_static} )
+# this creates the yuvconstants tool
+ADD_EXECUTABLE ( yuvconstants ${ly_base_dir}/util/yuvconstants.c )
+TARGET_LINK_LIBRARIES ( yuvconstants ${ly_lib_static} )
-INCLUDE ( FindJPEG )
+find_package ( JPEG )
if (JPEG_FOUND)
include_directories( ${JPEG_INCLUDE_DIR} )
- target_link_libraries( yuvconvert ${JPEG_LIBRARY} )
+ target_link_libraries( ${ly_lib_shared} ${JPEG_LIBRARY} )
add_definitions( -DHAVE_JPEG )
endif()
-if(TEST)
+if(UNIT_TEST)
find_library(GTEST_LIBRARY gtest)
if(GTEST_LIBRARY STREQUAL "GTEST_LIBRARY-NOTFOUND")
set(GTEST_SRC_DIR /usr/src/gtest CACHE STRING "Location of gtest sources")
+ if (CMAKE_CROSSCOMPILING)
+ set(GTEST_SRC_DIR third_party/googletest/src/googletest)
+ endif()
if(EXISTS ${GTEST_SRC_DIR}/src/gtest-all.cc)
message(STATUS "building gtest from sources in ${GTEST_SRC_DIR}")
set(gtest_sources ${GTEST_SRC_DIR}/src/gtest-all.cc)
@@ -61,7 +67,7 @@ if(TEST)
include_directories(${GTEST_SRC_DIR}/include)
set(GTEST_LIBRARY gtest)
else()
- message(FATAL_ERROR "TEST is set but unable to find gtest library")
+ message(FATAL_ERROR "UNIT_TEST is set but unable to find gtest library")
endif()
endif()
@@ -78,6 +84,12 @@ if(TEST)
if(NACL AND NACL_LIBC STREQUAL "newlib")
target_link_libraries(libyuv_unittest glibc-compat)
endif()
+
+ find_library(GFLAGS_LIBRARY gflags)
+ if(NOT GFLAGS_LIBRARY STREQUAL "GFLAGS_LIBRARY-NOTFOUND")
+ target_link_libraries(libyuv_unittest gflags)
+ add_definitions(-DLIBYUV_USE_GFLAGS)
+ endif()
endif()
diff --git a/files/DEPS b/files/DEPS
index 3cf2dbe0..a7bec8d3 100644
--- a/files/DEPS
+++ b/files/DEPS
@@ -5,43 +5,62 @@ gclient_gn_args = [
vars = {
'chromium_git': 'https://chromium.googlesource.com',
- 'chromium_revision': '829c6df33dce1085a61d8fd44209fc84bbf9a6a7',
- 'gn_version': 'git_revision:6f13aaac55a977e1948910942675c69f2b4f7a94',
+ 'chromium_revision': 'd1501576384de23ddf8d8815ee7c95be2f708de5',
+ 'gn_version': 'git_revision:e3978de3e8dafb50a2b11efa784e08699a43faf8',
+ # ninja CIPD package version.
+ # https://chrome-infra-packages.appspot.com/p/infra/3pp/tools/ninja
+ 'ninja_version': 'version:2@1.11.1.chromium.6',
+ # reclient CIPD package version
+ 'reclient_version': 're_client_version:0.107.1.0b39c4c-gomaip',
# Keep the Chromium default of generating location tags.
'generate_location_tags': True,
+
+ # By default, download the fuchsia sdk from the public sdk directory.
+ 'fuchsia_sdk_cipd_prefix': 'fuchsia/sdk/gn/',
+ 'fuchsia_version': 'version:12.20230530.1.1',
+ # By default, download the fuchsia images from the fuchsia GCS bucket.
+ 'fuchsia_images_bucket': 'fuchsia',
+ 'checkout_fuchsia': False,
+ # Since the images are hundreds of MB, default to only downloading the image
+ # most commonly useful for developers. Bots and developers that need to use
+ # other images can override this with additional images.
+ 'checkout_fuchsia_boot_images': "terminal.qemu-x64",
+ 'checkout_fuchsia_product_bundles': '"{checkout_fuchsia_boot_images}" != ""',
}
deps = {
'src/build':
- Var('chromium_git') + '/chromium/src/build' + '@' + 'dcea3443035f48d58193788e0bc56daca4e5db33',
+ Var('chromium_git') + '/chromium/src/build' + '@' + 'd0c2b4cf4fdd43866e066fb6722099aa8bf4ce79',
'src/buildtools':
- Var('chromium_git') + '/chromium/src/buildtools' + '@' + '075dd7e22837a69189003e4fa84499acf63188cf',
+ Var('chromium_git') + '/chromium/src/buildtools' + '@' + 'edbefcee3d2cc45cdb0c60c2b01b673f8ba728bc',
'src/testing':
- Var('chromium_git') + '/chromium/src/testing' + '@' + 'f4e42be13265ec304b0f3085eee2b15f30f44077',
+ Var('chromium_git') + '/chromium/src/testing' + '@' + 'a13817e1ea0255a375d13aeb3bb2527bd528495b',
'src/third_party':
- Var('chromium_git') + '/chromium/src/third_party' + '@' + '42c249feeb71bc0cd184849f0509aefef599343d',
+ Var('chromium_git') + '/chromium/src/third_party' + '@' + '824e26c9fcbd00fccf6cdb712f8f127aae133042',
'src/buildtools/linux64': {
'packages': [
{
- 'package': 'gn/gn/linux-amd64',
+ 'package': 'gn/gn/linux-${{arch}}',
'version': Var('gn_version'),
}
],
'dep_type': 'cipd',
- 'condition': 'checkout_linux',
+ 'condition': 'host_os == "linux"',
},
+
'src/buildtools/mac': {
'packages': [
{
- 'package': 'gn/gn/mac-amd64',
+ 'package': 'gn/gn/mac-${{arch}}',
'version': Var('gn_version'),
}
],
'dep_type': 'cipd',
- 'condition': 'checkout_mac',
+ 'condition': 'host_os == "mac"',
},
+
'src/buildtools/win': {
'packages': [
{
@@ -50,43 +69,57 @@ deps = {
}
],
'dep_type': 'cipd',
- 'condition': 'checkout_win',
+ 'condition': 'host_os == "win"',
+ },
+
+ 'src/buildtools/reclient': {
+ 'packages': [
+ {
+ 'package': 'infra/rbe/client/${{platform}}',
+ 'version': Var('reclient_version'),
+ }
+ ],
+ 'dep_type': 'cipd',
},
'src/buildtools/clang_format/script':
- Var('chromium_git') + '/external/github.com/llvm/llvm-project/clang/tools/clang-format.git' + '@' + '99876cacf78329e5f99c244dbe42ccd1654517a0',
+ Var('chromium_git') + '/external/github.com/llvm/llvm-project/clang/tools/clang-format.git' + '@' + 'f97059df7f8b205064625cdb5f97b56668a125ef',
'src/buildtools/third_party/libc++/trunk':
- Var('chromium_git') + '/external/github.com/llvm/llvm-project/libcxx.git' + '@' + '79a2e924d96e2fc1e4b937c42efd08898fa472d7',
+ Var('chromium_git') + '/external/github.com/llvm/llvm-project/libcxx.git' + '@' + 'f8279b01085b800724f5c5629dc365b9f040dc53',
'src/buildtools/third_party/libc++abi/trunk':
- Var('chromium_git') + '/external/github.com/llvm/llvm-project/libcxxabi.git' + '@' + '665b74f7d1b3bb295cd6ba7d8fcec1acd3d2ac84',
+ Var('chromium_git') + '/external/github.com/llvm/llvm-project/libcxxabi.git' + '@' + '5c8dbff7a4911fe1e0af0bc1628891e4187a3c90',
'src/buildtools/third_party/libunwind/trunk':
- Var('chromium_git') + '/external/github.com/llvm/llvm-project/libunwind.git' + '@' + 'f51a154281bdfe746c46c07cd4fb05be97f9441d',
+ Var('chromium_git') + '/external/github.com/llvm/llvm-project/libunwind.git' + '@' + 'cd144ced35285edaa064a91561969e5b22c219b1',
'src/third_party/catapult':
- Var('chromium_git') + '/catapult.git' + '@' + '75423c310eb303d28978be892fcf7b9c2c824909',
+ Var('chromium_git') + '/catapult.git' + '@' + '9f3ef9c2eae9b1adabde88efe5dcc438ba76e205',
'src/third_party/colorama/src':
- Var('chromium_git') + '/external/colorama.git' + '@' + '799604a1041e9b3bc5d2789ecbd7e8db2e18e6b8',
+ Var('chromium_git') + '/external/colorama.git' + '@' + '3de9f013df4b470069d03d250224062e8cf15c49',
+ 'src/third_party/cpu_features/src': {
+ 'url': Var('chromium_git') + '/external/github.com/google/cpu_features.git' + '@' + '936b9ab5515dead115606559502e3864958f7f6e',
+ 'condition': 'checkout_android',
+ },
'src/third_party/depot_tools':
- Var('chromium_git') + '/chromium/tools/depot_tools.git' + '@' + '2ffa1bde797a8127c0f72908d0bd74051fd65d0d',
+ Var('chromium_git') + '/chromium/tools/depot_tools.git' + '@' + '05ab73be51774f098eb580eda6e96a49e1010b1b',
'src/third_party/freetype/src':
- Var('chromium_git') + '/chromium/src/third_party/freetype2.git' + '@' + 'cff026d41599945498044d2f4dcc0e610ffb6929',
+ Var('chromium_git') + '/chromium/src/third_party/freetype2.git' + '@' + '80a507a6b8e3d2906ad2c8ba69329bd2fb2a85ef',
'src/third_party/googletest/src':
- Var('chromium_git') + '/external/github.com/google/googletest.git' + '@' + 'e2f3978937c0244508135f126e2617a7734a68be',
+ Var('chromium_git') + '/external/github.com/google/googletest.git' + '@' + 'af29db7ec28d6df1c7f0f745186884091e602e07',
'src/third_party/harfbuzz-ng/src':
- Var('chromium_git') + '/external/github.com/harfbuzz/harfbuzz.git' + '@' + '64b29dbd5994a511acee69cb9b45ad650ef88359',
+ Var('chromium_git') + '/external/github.com/harfbuzz/harfbuzz.git' + '@' + '8df5cdbcda495a582e72a7e2ce35d6106401edce',
'src/third_party/libjpeg_turbo':
- Var('chromium_git') + '/chromium/deps/libjpeg_turbo.git' + '@' + '02959c3ee17abacfd1339ec22ea93301292ffd56',
+ Var('chromium_git') + '/chromium/deps/libjpeg_turbo.git' + '@' + 'aa4075f116e4312537d0d3e9dbd5e31096539f94',
'src/third_party/nasm':
- Var('chromium_git') + '/chromium/deps/nasm.git' + '@' + '9215e8e1d0fe474ffd3e16c1a07a0f97089e6224',
+ Var('chromium_git') + '/chromium/deps/nasm.git' + '@' + '7fc833e889d1afda72c06220e5bed8fb43b2e5ce',
'src/tools':
- Var('chromium_git') + '/chromium/src/tools' + '@' + '198dc879529652b39ba6e223bcc0bcad5f1facd6',
+ Var('chromium_git') + '/chromium/src/tools' + '@' + '916dfffd61cbf61075c47d7b480425d7de1483fd',
# libyuv-only dependencies (not present in Chromium).
'src/third_party/gtest-parallel':
Var('chromium_git') + '/external/webrtc/deps/third_party/gtest-parallel' + '@' + '1dad0e9f6d82ff994130b529d7d814b40eb32b0e',
'src/third_party/lss': {
- 'url': Var('chromium_git') + '/linux-syscall-support.git' + '@' + '92a65a8f5d705d1928874420c8d0d15bde8c89e5',
+ 'url': Var('chromium_git') + '/linux-syscall-support.git' + '@' + 'ce877209e11aa69dcfffbd53ef90ea1d07136521',
'condition': 'checkout_android or checkout_linux',
},
@@ -101,14 +134,32 @@ deps = {
'condition': 'checkout_android',
'dep_type': 'cipd',
},
- 'src/third_party/auto/src': {
- 'url': Var('chromium_git') + '/external/github.com/google/auto.git' + '@' + 'fe67d853d6356943dc79541c892ab6d3e6a7b61a',
- 'condition': 'checkout_android',
+
+ 'src/third_party/kotlin_stdlib': {
+ 'packages': [
+ {
+ 'package': 'chromium/third_party/kotlin_stdlib',
+ 'version': 'z4_AYYz2Tw5GKikuiDLTuxxf0NJVGLkC3CVcyiIpc-gC',
+ },
+ ],
+ 'condition': 'checkout_android',
+ 'dep_type': 'cipd',
+ },
+ 'src/third_party/kotlinc/current': {
+ 'packages': [
+ {
+ 'package': 'chromium/third_party/kotlinc',
+ 'version': 'J3BAlA7yf4corBopDhlwuT9W4jR1Z9R55KD3BUTVldQC',
+ },
+ ],
+ 'condition': 'checkout_android',
+ 'dep_type': 'cipd',
},
+
'src/third_party/boringssl/src':
- 'https://boringssl.googlesource.com/boringssl.git' + '@' + '3a667d10e94186fd503966f5638e134fe9fb4080',
+ 'https://boringssl.googlesource.com/boringssl.git' + '@' + 'dd5219451c3ce26221762a15d867edf43b463bb2',
'src/base': {
- 'url': Var('chromium_git') + '/chromium/src/base' + '@' + 'e9e639622449a893a1b5e32781d072cec08ead72',
+ 'url': Var('chromium_git') + '/chromium/src/base' + '@' + 'b4c5ce6cb1a7c90de3fdddc80ed439fe87eab443',
'condition': 'checkout_android',
},
'src/third_party/bazel': {
@@ -132,19 +183,21 @@ deps = {
'dep_type': 'cipd',
},
'src/third_party/android_ndk': {
- 'url': Var('chromium_git') + '/android_ndk.git' + '@' + '401019bf85744311b26c88ced255cd53401af8b7',
+ 'url': Var('chromium_git') + '/android_ndk.git' + '@' + '310956bd122ec2b96049f8d7398de6b717f3452e',
'condition': 'checkout_android',
},
+
'src/third_party/androidx': {
'packages': [
{
'package': 'chromium/third_party/androidx',
- 'version': '6d8ij5pzYh29WWjPbdbAWFBJSA1nUgkWf2p6wCVZKIsC',
+ 'version': 'Wr5b9WJiFAzJcmjmvQIePIxk5IgpDl62kaGY_SiLxJEC',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_support_test_runner': {
'packages': [
{
@@ -158,16 +211,12 @@ deps = {
'src/third_party/android_sdk/public': {
'packages': [
{
- 'package': 'chromium/third_party/android_sdk/public/build-tools/31.0.0',
- 'version': 'tRoD45SCi7UleQqSV7MrMQO1_e5P8ysphkCcj6z_cCQC',
+ 'package': 'chromium/third_party/android_sdk/public/build-tools/33.0.0',
+ 'version': '-VRKr36Uw8L_iFqqo9nevIBgNMggND5iWxjidyjnCgsC',
},
{
'package': 'chromium/third_party/android_sdk/public/emulator',
- 'version': 'gMHhUuoQRKfxr-MBn3fNNXZtkAVXtOwMwT7kfx8jkIgC',
- },
- {
- 'package': 'chromium/third_party/android_sdk/public/extras',
- 'version': 'ppQ4TnqDvBHQ3lXx5KPq97egzF5X2FFyOrVHkGmiTMQC',
+ 'version': '9lGp8nTUCRRWGMnI_96HcKfzjnxEJKUcfvfwmA3wXNkC',
},
{
'package': 'chromium/third_party/android_sdk/public/patcher',
@@ -175,11 +224,15 @@ deps = {
},
{
'package': 'chromium/third_party/android_sdk/public/platform-tools',
- 'version': 'g7n_-r6yJd_SGRklujGB1wEt8iyr77FZTUJVS9w6O34C',
+ 'version': 'RSI3iwryh7URLGRgJHsCvUxj092woTPnKt4pwFcJ6L8C',
},
{
- 'package': 'chromium/third_party/android_sdk/public/platforms/android-31',
- 'version': 'lL3IGexKjYlwjO_1Ga-xwxgwbE_w-lmi2Zi1uOlWUIAC',
+ 'package': 'chromium/third_party/android_sdk/public/platforms/android-33',
+ 'version': 'eo5KvW6UVor92LwZai8Zulc624BQZoCu-yn7wa1z_YcC',
+ },
+ {
+ 'package': 'chromium/third_party/android_sdk/public/platforms/android-tiramisuprivacysandbox',
+ 'version': 'YWMYkzyxGBgVsty0GhXL1oxbY0pGXQIgFc0Rh7ZMRPYC',
},
{
'package': 'chromium/third_party/android_sdk/public/sources/android-31',
@@ -187,7 +240,7 @@ deps = {
},
{
'package': 'chromium/third_party/android_sdk/public/cmdline-tools',
- 'version': 'Ez2NWws2SJYCF6qw2O-mSCqK6424l3ZdSTpppLyVR_cC',
+ 'version': 'EWnL2r7oV5GtE9Ef7GyohyFam42wtMtEKYU4dCb3U1YC',
},
],
'condition': 'checkout_android',
@@ -207,7 +260,7 @@ deps = {
'packages': [
{
'package': 'chromium/third_party/android_build_tools/aapt2',
- 'version': 'version:3.6.0-alpha03-5516695-cr0',
+ 'version': 'STY0BXlZxsEhudnlXQFed-B5UpwehcoM0sYqor6qRqsC',
},
],
'condition': 'checkout_android',
@@ -223,6 +276,16 @@ deps = {
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+ 'src/third_party/byte_buddy/android_sdk_build_tools_25_0_2': {
+ 'packages': [
+ {
+ 'package': 'chromium/third_party/android_sdk/public/build-tools',
+ 'version': 'kwIs2vdfTm93yEP8LG5aSnchN4BVEdVxbqQtF4XpPdkC',
+ },
+ ],
+ 'condition': 'checkout_android',
+ 'dep_type': 'cipd',
+ },
'src/third_party/ced/src': {
'url': Var('chromium_git') + '/external/github.com/google/compact_enc_det.git' + '@' + 'ba412eaaacd3186085babcd901679a48863c7dd5',
'condition': 'checkout_android',
@@ -267,7 +330,7 @@ deps = {
},
'src/third_party/icu': {
- 'url': Var('chromium_git') + '/chromium/deps/icu.git' + '@' + 'bf66d373ae781a3498f2babe7b61d933dd774b82',
+ 'url': Var('chromium_git') + '/chromium/deps/icu.git' + '@' + 'a2961dc659b4ae847a9c6120718cc2517ee57d9e',
},
'src/third_party/icu4j': {
'packages': [
@@ -293,11 +356,7 @@ deps = {
'packages': [
{
'package': 'chromium/third_party/jdk',
- 'version': 'PfRSnxe8Od6WU4zBXomq-zsgcJgWmm3z4gMQNB-r2QcC',
- },
- {
- 'package': 'chromium/third_party/jdk/extras',
- 'version': 'fkhuOQ3r-zKtWEdKplpo6k0vKkjl-LY_rJTmtzFCQN4C',
+ 'version': '2Of9Pe_OdO4xoAATuiLDiMVNebKTNO3WrwJGqil4RosC',
},
],
'condition': 'checkout_android',
@@ -308,22 +367,31 @@ deps = {
'condition': 'checkout_android',
},
'src/third_party/junit/src': {
- 'url': Var('chromium_git') + '/external/junit.git' + '@' + '64155f8a9babcfcf4263cf4d08253a1556e75481',
+ 'url': Var('chromium_git') + '/external/junit.git' + '@' + '05fe2a64f59127c02135be22f416e91260d6ede6',
'condition': 'checkout_android',
},
'src/third_party/libunwindstack': {
- 'url': Var('chromium_git') + '/chromium/src/third_party/libunwindstack.git' + '@' + '6868358481bb1e5e20d155c1084dc436c88b5e6b',
+ 'url': Var('chromium_git') + '/chromium/src/third_party/libunwindstack.git' + '@' + '4dbfa0e8c844c8e243b297bc185e54a99ff94f9e',
'condition': 'checkout_android',
},
+ 'src/third_party/ninja': {
+ 'packages': [
+ {
+ 'package': 'infra/3pp/tools/ninja/${{platform}}',
+ 'version': Var('ninja_version'),
+ }
+ ],
+ 'dep_type': 'cipd',
+ },
'src/third_party/mockito/src': {
- 'url': Var('chromium_git') + '/external/mockito/mockito.git' + '@' + '04a2a289a4222f80ad20717c25144981210d2eac',
+ 'url': Var('chromium_git') + '/external/mockito/mockito.git' + '@' + '7c3641bcef717ffa7d765f2c86b847d0aab1aac9',
'condition': 'checkout_android',
},
'src/third_party/objenesis': {
'packages': [
{
'package': 'chromium/third_party/objenesis',
- 'version': '9e367f55e5a65781ee77bfcbaa88fb82b30e75c0',
+ 'version': 'tknDblENYi8IaJYyD6tUahUyHYZlzJ_Y74_QZSz4DpIC',
},
],
'condition': 'checkout_android',
@@ -343,7 +411,20 @@ deps = {
'packages': [
{
'package': 'chromium/third_party/r8',
- 'version': 'Nu_mvQJe34CotIXadFlA3w732CJ9EvQGuVs4udcZedAC',
+ 'version': '4Oq32DG2vuDh7Frxj6tH5xyi77sVgBWpvvl4hwvZRR4C',
+ },
+ ],
+ 'condition': 'checkout_android',
+ 'dep_type': 'cipd',
+ },
+ # This duplication is intentional, so we avoid updating the r8.jar used by
+ # dexing unless necessary, since each update invalidates all incremental
+ # dexing and unnecessarily slows down all bots.
+ 'src/third_party/r8/d8': {
+ 'packages': [
+ {
+ 'package': 'chromium/third_party/r8',
+ 'version': 'PwglNZFRNPkBBXdnY9NfrZFk2ULWDTRxhV9rl2kvkpUC',
},
],
'condition': 'checkout_android',
@@ -367,7 +448,7 @@ deps = {
'packages': [
{
'package': 'chromium/third_party/robolectric',
- 'version': 'iC6RDM5EH3GEAzR-1shW_Mg0FeeNE5shq1okkFfuuNQC',
+ 'version': 'hzetqh1qFI32FOgQroZvGcGdomrgVBJ6WKRnl1KFw6EC',
},
],
'condition': 'checkout_android',
@@ -377,7 +458,7 @@ deps = {
'packages': [
{
'package': 'chromium/third_party/sqlite4java',
- 'version': '889660698187baa7c8b0d79f7bf58563125fbd66',
+ 'version': 'LofjKH9dgXIAJhRYCPQlMFywSwxYimrfDeBmaHc-Z5EC',
},
],
'condition': 'checkout_android',
@@ -387,7 +468,7 @@ deps = {
'packages': [
{
'package': 'chromium/third_party/turbine',
- 'version': 'Om6yIEXgJxuqghErK29h9RcMH6VaymMbxwScwXmcN6EC',
+ 'version': 'Foa7uRpVoKr4YoayCKc9EERkjpmGOE3DAUTWFLL7gKEC',
},
],
'condition': 'checkout_android',
@@ -400,1718 +481,1822 @@ deps = {
# iOS deps:
'src/ios': {
- 'url': Var('chromium_git') + '/chromium/src/ios' + '@' + '81826d980c159f949c2c7901f4dbec9a09788964',
+ 'url': Var('chromium_git') + '/chromium/src/ios' + '@' + '241921896b64f85de9a32d461462913cbff4baeb',
'condition': 'checkout_ios'
},
# Everything coming after this is automatically updated by the auto-roller.
# === ANDROID_DEPS Generated Code Start ===
-
+ # Generated by //third_party/android_deps/fetch_all.py
'src/third_party/android_deps/libs/android_arch_core_common': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/android_arch_core_common',
- 'version': 'version:2@1.1.1.cr0',
+ 'version': 'version:2@1.1.1.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/android_arch_core_runtime': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/android_arch_core_runtime',
- 'version': 'version:2@1.1.1.cr0',
+ 'version': 'version:2@1.1.1.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/android_arch_lifecycle_common': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/android_arch_lifecycle_common',
- 'version': 'version:2@1.1.1.cr0',
+ 'version': 'version:2@1.1.1.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/android_arch_lifecycle_common_java8': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/android_arch_lifecycle_common_java8',
- 'version': 'version:2@1.1.1.cr0',
+ 'version': 'version:2@1.1.1.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/android_arch_lifecycle_livedata': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/android_arch_lifecycle_livedata',
- 'version': 'version:2@1.1.1.cr0',
+ 'version': 'version:2@1.1.1.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/android_arch_lifecycle_livedata_core': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/android_arch_lifecycle_livedata_core',
- 'version': 'version:2@1.1.1.cr0',
+ 'version': 'version:2@1.1.1.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/android_arch_lifecycle_runtime': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/android_arch_lifecycle_runtime',
- 'version': 'version:2@1.1.1.cr0',
+ 'version': 'version:2@1.1.1.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/android_arch_lifecycle_viewmodel': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/android_arch_lifecycle_viewmodel',
- 'version': 'version:2@1.1.1.cr0',
- },
- ],
- 'condition': 'checkout_android',
- 'dep_type': 'cipd',
- },
- 'src/third_party/android_deps/libs/backport_util_concurrent_backport_util_concurrent': {
- 'packages': [
- {
- 'package': 'chromium/third_party/android_deps/libs/backport_util_concurrent_backport_util_concurrent',
- 'version': 'version:2@3.1.cr0',
- },
- ],
- 'condition': 'checkout_android',
- 'dep_type': 'cipd',
- },
- 'src/third_party/android_deps/libs/classworlds_classworlds': {
- 'packages': [
- {
- 'package': 'chromium/third_party/android_deps/libs/classworlds_classworlds',
- 'version': 'version:2@1.1-alpha-2.cr0',
+ 'version': 'version:2@1.1.1.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_android_support_animated_vector_drawable': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_android_support_animated_vector_drawable',
- 'version': 'version:2@28.0.0.cr0',
+ 'version': 'version:2@28.0.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_android_support_appcompat_v7': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_android_support_appcompat_v7',
- 'version': 'version:2@28.0.0.cr0',
+ 'version': 'version:2@28.0.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_android_support_asynclayoutinflater': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_android_support_asynclayoutinflater',
- 'version': 'version:2@28.0.0.cr0',
+ 'version': 'version:2@28.0.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_android_support_cardview_v7': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_android_support_cardview_v7',
- 'version': 'version:2@28.0.0.cr0',
+ 'version': 'version:2@28.0.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_android_support_collections': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_android_support_collections',
- 'version': 'version:2@28.0.0.cr0',
+ 'version': 'version:2@28.0.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_android_support_coordinatorlayout': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_android_support_coordinatorlayout',
- 'version': 'version:2@28.0.0.cr0',
+ 'version': 'version:2@28.0.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_android_support_cursoradapter': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_android_support_cursoradapter',
- 'version': 'version:2@28.0.0.cr0',
+ 'version': 'version:2@28.0.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_android_support_customview': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_android_support_customview',
- 'version': 'version:2@28.0.0.cr0',
+ 'version': 'version:2@28.0.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_android_support_design': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_android_support_design',
- 'version': 'version:2@28.0.0.cr0',
+ 'version': 'version:2@28.0.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_android_support_documentfile': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_android_support_documentfile',
- 'version': 'version:2@28.0.0.cr0',
+ 'version': 'version:2@28.0.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_android_support_drawerlayout': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_android_support_drawerlayout',
- 'version': 'version:2@28.0.0.cr0',
+ 'version': 'version:2@28.0.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_android_support_interpolator': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_android_support_interpolator',
- 'version': 'version:2@28.0.0.cr0',
+ 'version': 'version:2@28.0.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_android_support_loader': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_android_support_loader',
- 'version': 'version:2@28.0.0.cr0',
+ 'version': 'version:2@28.0.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_android_support_localbroadcastmanager': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_android_support_localbroadcastmanager',
- 'version': 'version:2@28.0.0.cr0',
+ 'version': 'version:2@28.0.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_android_support_multidex': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_android_support_multidex',
- 'version': 'version:2@1.0.0.cr0',
+ 'version': 'version:2@1.0.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_android_support_print': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_android_support_print',
- 'version': 'version:2@28.0.0.cr0',
+ 'version': 'version:2@28.0.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_android_support_recyclerview_v7': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_android_support_recyclerview_v7',
- 'version': 'version:2@28.0.0.cr0',
+ 'version': 'version:2@28.0.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_android_support_slidingpanelayout': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_android_support_slidingpanelayout',
- 'version': 'version:2@28.0.0.cr0',
+ 'version': 'version:2@28.0.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_android_support_support_annotations': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_android_support_support_annotations',
- 'version': 'version:2@28.0.0.cr0',
+ 'version': 'version:2@28.0.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_android_support_support_compat': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_android_support_support_compat',
- 'version': 'version:2@28.0.0.cr0',
+ 'version': 'version:2@28.0.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_android_support_support_core_ui': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_android_support_support_core_ui',
- 'version': 'version:2@28.0.0.cr0',
+ 'version': 'version:2@28.0.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_android_support_support_core_utils': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_android_support_support_core_utils',
- 'version': 'version:2@28.0.0.cr0',
+ 'version': 'version:2@28.0.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_android_support_support_fragment': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_android_support_support_fragment',
- 'version': 'version:2@28.0.0.cr0',
+ 'version': 'version:2@28.0.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_android_support_support_media_compat': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_android_support_support_media_compat',
- 'version': 'version:2@28.0.0.cr0',
+ 'version': 'version:2@28.0.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_android_support_support_v4': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_android_support_support_v4',
- 'version': 'version:2@28.0.0.cr0',
+ 'version': 'version:2@28.0.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_android_support_support_vector_drawable': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_android_support_support_vector_drawable',
- 'version': 'version:2@28.0.0.cr0',
+ 'version': 'version:2@28.0.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_android_support_swiperefreshlayout': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_android_support_swiperefreshlayout',
- 'version': 'version:2@28.0.0.cr0',
+ 'version': 'version:2@28.0.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_android_support_transition': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_android_support_transition',
- 'version': 'version:2@28.0.0.cr0',
+ 'version': 'version:2@28.0.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_android_support_versionedparcelable': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_android_support_versionedparcelable',
- 'version': 'version:2@28.0.0.cr0',
+ 'version': 'version:2@28.0.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_android_support_viewpager': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_android_support_viewpager',
- 'version': 'version:2@28.0.0.cr0',
+ 'version': 'version:2@28.0.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_android_tools_common': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_android_tools_common',
- 'version': 'version:2@30.0.0-alpha10.cr0',
+ 'version': 'version:2@30.2.0-beta01.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
- 'src/third_party/android_deps/libs/com_android_tools_desugar_jdk_libs': {
+
+ 'src/third_party/android_deps/libs/com_android_tools_layoutlib_layoutlib_api': {
'packages': [
{
- 'package': 'chromium/third_party/android_deps/libs/com_android_tools_desugar_jdk_libs',
- 'version': 'version:2@1.1.1.cr0',
+ 'package': 'chromium/third_party/android_deps/libs/com_android_tools_layoutlib_layoutlib_api',
+ 'version': 'version:2@30.2.0-beta01.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
- 'src/third_party/android_deps/libs/com_android_tools_desugar_jdk_libs_configuration': {
+
+ 'src/third_party/android_deps/libs/com_android_tools_sdk_common': {
'packages': [
{
- 'package': 'chromium/third_party/android_deps/libs/com_android_tools_desugar_jdk_libs_configuration',
- 'version': 'version:2@1.1.1.cr0',
+ 'package': 'chromium/third_party/android_deps/libs/com_android_tools_sdk_common',
+ 'version': 'version:2@30.2.0-beta01.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
- 'src/third_party/android_deps/libs/com_android_tools_layoutlib_layoutlib_api': {
+
+ 'src/third_party/android_deps/libs/com_github_ben_manes_caffeine_caffeine': {
'packages': [
{
- 'package': 'chromium/third_party/android_deps/libs/com_android_tools_layoutlib_layoutlib_api',
- 'version': 'version:2@30.0.0-alpha10.cr0',
+ 'package': 'chromium/third_party/android_deps/libs/com_github_ben_manes_caffeine_caffeine',
+ 'version': 'version:2@2.8.8.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
- 'src/third_party/android_deps/libs/com_android_tools_sdk_common': {
+
+ 'src/third_party/android_deps/libs/com_github_kevinstern_software_and_algorithms': {
'packages': [
{
- 'package': 'chromium/third_party/android_deps/libs/com_android_tools_sdk_common',
- 'version': 'version:2@30.0.0-alpha10.cr0',
+ 'package': 'chromium/third_party/android_deps/libs/com_github_kevinstern_software_and_algorithms',
+ 'version': 'version:2@1.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
- 'src/third_party/android_deps/libs/com_github_ben_manes_caffeine_caffeine': {
+
+ 'src/third_party/android_deps/libs/com_google_android_annotations': {
'packages': [
{
- 'package': 'chromium/third_party/android_deps/libs/com_github_ben_manes_caffeine_caffeine',
- 'version': 'version:2@2.8.8.cr0',
+ 'package': 'chromium/third_party/android_deps/libs/com_google_android_annotations',
+ 'version': 'version:2@4.1.1.4.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
- 'src/third_party/android_deps/libs/com_github_kevinstern_software_and_algorithms': {
+
+ 'src/third_party/android_deps/libs/com_google_android_apps_common_testing_accessibility_framework_accessibility_test_framework': {
'packages': [
{
- 'package': 'chromium/third_party/android_deps/libs/com_github_kevinstern_software_and_algorithms',
- 'version': 'version:2@1.0.cr0',
+ 'package': 'chromium/third_party/android_deps/libs/com_google_android_apps_common_testing_accessibility_framework_accessibility_test_framework',
+ 'version': 'version:2@4.0.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_google_android_datatransport_transport_api': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_google_android_datatransport_transport_api',
- 'version': 'version:2@2.2.1.cr0',
+ 'version': 'version:2@2.2.1.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_google_android_gms_play_services_auth': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_google_android_gms_play_services_auth',
- 'version': 'version:2@17.0.0.cr0',
+ 'version': 'version:2@20.1.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_google_android_gms_play_services_auth_api_phone': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_google_android_gms_play_services_auth_api_phone',
- 'version': 'version:2@17.5.0.cr0',
+ 'version': 'version:2@18.0.1.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_google_android_gms_play_services_auth_base': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_google_android_gms_play_services_auth_base',
- 'version': 'version:2@17.0.0.cr0',
+ 'version': 'version:2@18.0.2.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_google_android_gms_play_services_base': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_google_android_gms_play_services_base',
- 'version': 'version:2@17.5.0.cr0',
+ 'version': 'version:2@18.0.1.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_google_android_gms_play_services_basement': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_google_android_gms_play_services_basement',
- 'version': 'version:2@17.5.0.cr0',
+ 'version': 'version:2@18.1.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_google_android_gms_play_services_cast': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_google_android_gms_play_services_cast',
- 'version': 'version:2@17.0.0.cr0',
+ 'version': 'version:2@17.0.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_google_android_gms_play_services_cast_framework': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_google_android_gms_play_services_cast_framework',
- 'version': 'version:2@17.0.0.cr0',
+ 'version': 'version:2@17.0.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_google_android_gms_play_services_clearcut': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_google_android_gms_play_services_clearcut',
- 'version': 'version:2@17.0.0.cr0',
+ 'version': 'version:2@17.0.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_google_android_gms_play_services_cloud_messaging': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_google_android_gms_play_services_cloud_messaging',
- 'version': 'version:2@16.0.0.cr0',
- },
- ],
- 'condition': 'checkout_android',
- 'dep_type': 'cipd',
- },
- 'src/third_party/android_deps/libs/com_google_android_gms_play_services_fido': {
- 'packages': [
- {
- 'package': 'chromium/third_party/android_deps/libs/com_google_android_gms_play_services_fido',
- 'version': 'version:2@19.0.0-beta.cr0',
+ 'version': 'version:2@16.0.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_google_android_gms_play_services_flags': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_google_android_gms_play_services_flags',
- 'version': 'version:2@17.0.0.cr0',
+ 'version': 'version:2@17.0.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_google_android_gms_play_services_gcm': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_google_android_gms_play_services_gcm',
- 'version': 'version:2@17.0.0.cr0',
+ 'version': 'version:2@17.0.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_google_android_gms_play_services_iid': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_google_android_gms_play_services_iid',
- 'version': 'version:2@17.0.0.cr0',
+ 'version': 'version:2@17.0.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_google_android_gms_play_services_instantapps': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_google_android_gms_play_services_instantapps',
- 'version': 'version:2@17.0.0.cr0',
+ 'version': 'version:2@18.0.1.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_google_android_gms_play_services_location': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_google_android_gms_play_services_location',
- 'version': 'version:2@17.0.0.cr0',
+ 'version': 'version:2@19.0.1.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_google_android_gms_play_services_phenotype': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_google_android_gms_play_services_phenotype',
- 'version': 'version:2@17.0.0.cr0',
+ 'version': 'version:2@17.0.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_google_android_gms_play_services_places_placereport': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_google_android_gms_play_services_places_placereport',
- 'version': 'version:2@17.0.0.cr0',
+ 'version': 'version:2@17.0.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_google_android_gms_play_services_stats': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_google_android_gms_play_services_stats',
- 'version': 'version:2@17.0.0.cr0',
+ 'version': 'version:2@17.0.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_google_android_gms_play_services_tasks': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_google_android_gms_play_services_tasks',
- 'version': 'version:2@17.2.0.cr0',
+ 'version': 'version:2@18.0.2.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_google_android_gms_play_services_vision': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_google_android_gms_play_services_vision',
- 'version': 'version:2@18.0.0.cr0',
+ 'version': 'version:2@20.1.3.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_google_android_gms_play_services_vision_common': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_google_android_gms_play_services_vision_common',
- 'version': 'version:2@18.0.0.cr0',
+ 'version': 'version:2@19.1.3.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_google_android_material_material': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_google_android_material_material',
- 'version': 'version:2@1.6.0-alpha01.cr0',
+ 'version': 'version:2@1.7.0-alpha02.cr1',
+ },
+ ],
+ 'condition': 'checkout_android',
+ 'dep_type': 'cipd',
+ },
+
+ 'src/third_party/android_deps/libs/com_google_android_play_core_common': {
+ 'packages': [
+ {
+ 'package': 'chromium/third_party/android_deps/libs/com_google_android_play_core_common',
+ 'version': 'version:2@2.0.2.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
- 'src/third_party/android_deps/libs/com_google_android_play_core': {
+
+ 'src/third_party/android_deps/libs/com_google_android_play_feature_delivery': {
'packages': [
{
- 'package': 'chromium/third_party/android_deps/libs/com_google_android_play_core',
- 'version': 'version:2@1.10.0.cr0',
+ 'package': 'chromium/third_party/android_deps/libs/com_google_android_play_feature_delivery',
+ 'version': 'version:2@2.0.1.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_google_auto_auto_common': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_google_auto_auto_common',
- 'version': 'version:2@1.1.2.cr0',
+ 'version': 'version:2@1.2.1.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_google_auto_service_auto_service': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_google_auto_service_auto_service',
- 'version': 'version:2@1.0-rc6.cr0',
+ 'version': 'version:2@1.0-rc6.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_google_auto_service_auto_service_annotations': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_google_auto_service_auto_service_annotations',
- 'version': 'version:2@1.0-rc6.cr0',
+ 'version': 'version:2@1.0-rc6.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_google_auto_value_auto_value_annotations': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_google_auto_value_auto_value_annotations',
- 'version': 'version:2@1.7.cr0',
- },
- ],
- 'condition': 'checkout_android',
- 'dep_type': 'cipd',
- },
- 'src/third_party/android_deps/libs/com_google_code_findbugs_jformatstring': {
- 'packages': [
- {
- 'package': 'chromium/third_party/android_deps/libs/com_google_code_findbugs_jformatstring',
- 'version': 'version:2@3.0.0.cr0',
+ 'version': 'version:2@1.10.1.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_google_code_findbugs_jsr305': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_google_code_findbugs_jsr305',
- 'version': 'version:2@3.0.2.cr0',
+ 'version': 'version:2@3.0.2.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_google_code_gson_gson': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_google_code_gson_gson',
- 'version': 'version:2@2.8.0.cr0',
+ 'version': 'version:2@2.9.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_google_dagger_dagger': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_google_dagger_dagger',
- 'version': 'version:2@2.30.cr0',
+ 'version': 'version:2@2.30.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_google_dagger_dagger_compiler': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_google_dagger_dagger_compiler',
- 'version': 'version:2@2.30.cr0',
+ 'version': 'version:2@2.30.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_google_dagger_dagger_producers': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_google_dagger_dagger_producers',
- 'version': 'version:2@2.30.cr0',
+ 'version': 'version:2@2.30.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_google_dagger_dagger_spi': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_google_dagger_dagger_spi',
- 'version': 'version:2@2.30.cr0',
+ 'version': 'version:2@2.30.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_google_errorprone_error_prone_annotation': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_google_errorprone_error_prone_annotation',
- 'version': 'version:2@2.10.0.cr0',
+ 'version': 'version:2@2.11.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_google_errorprone_error_prone_annotations': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_google_errorprone_error_prone_annotations',
- 'version': 'version:2@2.10.0.cr0',
+ 'version': 'version:2@2.18.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_google_errorprone_error_prone_check_api': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_google_errorprone_error_prone_check_api',
- 'version': 'version:2@2.10.0.cr0',
+ 'version': 'version:2@2.11.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_google_errorprone_error_prone_core': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_google_errorprone_error_prone_core',
- 'version': 'version:2@2.10.0.cr0',
+ 'version': 'version:2@2.11.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_google_errorprone_error_prone_type_annotations': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_google_errorprone_error_prone_type_annotations',
- 'version': 'version:2@2.10.0.cr0',
+ 'version': 'version:2@2.11.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_google_errorprone_javac': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_google_errorprone_javac',
- 'version': 'version:2@9+181-r4173-1.cr0',
+ 'version': 'version:2@9+181-r4173-1.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_google_errorprone_javac_shaded': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_google_errorprone_javac_shaded',
- 'version': 'version:2@9-dev-r4023-3.cr0',
+ 'version': 'version:2@9-dev-r4023-3.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_google_firebase_firebase_annotations': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_google_firebase_firebase_annotations',
- 'version': 'version:2@16.0.0.cr0',
+ 'version': 'version:2@16.0.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_google_firebase_firebase_common': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_google_firebase_firebase_common',
- 'version': 'version:2@19.5.0.cr0',
+ 'version': 'version:2@19.5.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_google_firebase_firebase_components': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_google_firebase_firebase_components',
- 'version': 'version:2@16.1.0.cr0',
+ 'version': 'version:2@16.1.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_google_firebase_firebase_encoders': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_google_firebase_firebase_encoders',
- 'version': 'version:2@16.1.0.cr0',
+ 'version': 'version:2@16.1.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_google_firebase_firebase_encoders_json': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_google_firebase_firebase_encoders_json',
- 'version': 'version:2@17.1.0.cr0',
+ 'version': 'version:2@17.1.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_google_firebase_firebase_iid': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_google_firebase_firebase_iid',
- 'version': 'version:2@21.0.1.cr0',
+ 'version': 'version:2@21.0.1.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_google_firebase_firebase_iid_interop': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_google_firebase_firebase_iid_interop',
- 'version': 'version:2@17.0.0.cr0',
+ 'version': 'version:2@17.0.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_google_firebase_firebase_installations': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_google_firebase_firebase_installations',
- 'version': 'version:2@16.3.5.cr0',
+ 'version': 'version:2@16.3.5.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_google_firebase_firebase_installations_interop': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_google_firebase_firebase_installations_interop',
- 'version': 'version:2@16.0.1.cr0',
+ 'version': 'version:2@16.0.1.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_google_firebase_firebase_measurement_connector': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_google_firebase_firebase_measurement_connector',
- 'version': 'version:2@18.0.0.cr0',
+ 'version': 'version:2@18.0.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_google_firebase_firebase_messaging': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_google_firebase_firebase_messaging',
- 'version': 'version:2@21.0.1.cr0',
- },
- ],
- 'condition': 'checkout_android',
- 'dep_type': 'cipd',
- },
- 'src/third_party/android_deps/libs/com_google_flatbuffers_flatbuffers_java': {
- 'packages': [
- {
- 'package': 'chromium/third_party/android_deps/libs/com_google_flatbuffers_flatbuffers_java',
- 'version': 'version:2@2.0.3.cr0',
+ 'version': 'version:2@21.0.1.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_google_googlejavaformat_google_java_format': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_google_googlejavaformat_google_java_format',
- 'version': 'version:2@1.5.cr0',
+ 'version': 'version:2@1.5.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_google_guava_failureaccess': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_google_guava_failureaccess',
- 'version': 'version:2@1.0.1.cr0',
+ 'version': 'version:2@1.0.1.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_google_guava_guava': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_google_guava_guava',
- 'version': 'version:2@31.0-jre.cr0',
+ 'version': 'version:2@31.1-jre.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_google_guava_guava_android': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_google_guava_guava_android',
- 'version': 'version:2@31.0-android.cr0',
+ 'version': 'version:2@31.1-android.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_google_guava_listenablefuture': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_google_guava_listenablefuture',
- 'version': 'version:2@1.0.cr0',
+ 'version': 'version:2@1.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_google_j2objc_j2objc_annotations': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_google_j2objc_j2objc_annotations',
- 'version': 'version:2@1.3.cr0',
+ 'version': 'version:2@1.3.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_google_protobuf_protobuf_java': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_google_protobuf_protobuf_java',
- 'version': 'version:2@3.4.0.cr0',
+ 'version': 'version:2@3.19.2.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_google_protobuf_protobuf_javalite': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_google_protobuf_protobuf_javalite',
- 'version': 'version:2@3.13.0.cr0',
+ 'version': 'version:2@3.21.1.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_googlecode_java_diff_utils_diffutils': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_googlecode_java_diff_utils_diffutils',
- 'version': 'version:2@1.3.0.cr0',
+ 'version': 'version:2@1.3.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_squareup_javapoet': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_squareup_javapoet',
- 'version': 'version:2@1.13.0.cr0',
+ 'version': 'version:2@1.13.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/com_squareup_javawriter': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_squareup_javawriter',
- 'version': 'version:2@2.1.1.cr0',
- },
- ],
- 'condition': 'checkout_android',
- 'dep_type': 'cipd',
- },
- 'src/third_party/android_deps/libs/io_github_java_diff_utils_java_diff_utils': {
- 'packages': [
- {
- 'package': 'chromium/third_party/android_deps/libs/io_github_java_diff_utils_java_diff_utils',
- 'version': 'version:2@4.0.cr0',
- },
- ],
- 'condition': 'checkout_android',
- 'dep_type': 'cipd',
- },
- 'src/third_party/android_deps/libs/javax_annotation_javax_annotation_api': {
- 'packages': [
- {
- 'package': 'chromium/third_party/android_deps/libs/javax_annotation_javax_annotation_api',
- 'version': 'version:2@1.3.2.cr0',
+ 'version': 'version:2@2.1.1.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
- 'src/third_party/android_deps/libs/javax_annotation_jsr250_api': {
- 'packages': [
- {
- 'package': 'chromium/third_party/android_deps/libs/javax_annotation_jsr250_api',
- 'version': 'version:2@1.0.cr0',
- },
- ],
- 'condition': 'checkout_android',
- 'dep_type': 'cipd',
- },
- 'src/third_party/android_deps/libs/javax_inject_javax_inject': {
- 'packages': [
- {
- 'package': 'chromium/third_party/android_deps/libs/javax_inject_javax_inject',
- 'version': 'version:2@1.cr0',
- },
- ],
- 'condition': 'checkout_android',
- 'dep_type': 'cipd',
- },
- 'src/third_party/android_deps/libs/nekohtml_nekohtml': {
+
+ 'src/third_party/android_deps/libs/com_squareup_okio_okio_jvm': {
'packages': [
{
- 'package': 'chromium/third_party/android_deps/libs/nekohtml_nekohtml',
- 'version': 'version:2@1.9.6.2.cr0',
+ 'package': 'chromium/third_party/android_deps/libs/com_squareup_okio_okio_jvm',
+ 'version': 'version:2@3.0.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
- 'src/third_party/android_deps/libs/nekohtml_xercesminimal': {
+
+ 'src/third_party/android_deps/libs/com_squareup_wire_wire_runtime_jvm': {
'packages': [
{
- 'package': 'chromium/third_party/android_deps/libs/nekohtml_xercesminimal',
- 'version': 'version:2@1.9.6.2.cr0',
+ 'package': 'chromium/third_party/android_deps/libs/com_squareup_wire_wire_runtime_jvm',
+ 'version': 'version:2@4.5.1.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
- 'src/third_party/android_deps/libs/net_ltgt_gradle_incap_incap': {
+
+ 'src/third_party/android_deps/libs/io_github_java_diff_utils_java_diff_utils': {
'packages': [
{
- 'package': 'chromium/third_party/android_deps/libs/net_ltgt_gradle_incap_incap',
- 'version': 'version:2@0.2.cr0',
+ 'package': 'chromium/third_party/android_deps/libs/io_github_java_diff_utils_java_diff_utils',
+ 'version': 'version:2@4.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
- 'src/third_party/android_deps/libs/net_sf_kxml_kxml2': {
+
+ 'src/third_party/android_deps/libs/io_grpc_grpc_api': {
'packages': [
{
- 'package': 'chromium/third_party/android_deps/libs/net_sf_kxml_kxml2',
- 'version': 'version:2@2.3.0.cr0',
+ 'package': 'chromium/third_party/android_deps/libs/io_grpc_grpc_api',
+ 'version': 'version:2@1.49.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
- 'src/third_party/android_deps/libs/org_apache_ant_ant': {
+
+ 'src/third_party/android_deps/libs/io_grpc_grpc_binder': {
'packages': [
{
- 'package': 'chromium/third_party/android_deps/libs/org_apache_ant_ant',
- 'version': 'version:2@1.8.0.cr0',
+ 'package': 'chromium/third_party/android_deps/libs/io_grpc_grpc_binder',
+ 'version': 'version:2@1.49.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
- 'src/third_party/android_deps/libs/org_apache_ant_ant_launcher': {
+
+ 'src/third_party/android_deps/libs/io_grpc_grpc_context': {
'packages': [
{
- 'package': 'chromium/third_party/android_deps/libs/org_apache_ant_ant_launcher',
- 'version': 'version:2@1.8.0.cr0',
+ 'package': 'chromium/third_party/android_deps/libs/io_grpc_grpc_context',
+ 'version': 'version:2@1.49.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
- 'src/third_party/android_deps/libs/org_apache_maven_maven_ant_tasks': {
+
+ 'src/third_party/android_deps/libs/io_grpc_grpc_core': {
'packages': [
{
- 'package': 'chromium/third_party/android_deps/libs/org_apache_maven_maven_ant_tasks',
- 'version': 'version:2@2.1.3.cr0',
+ 'package': 'chromium/third_party/android_deps/libs/io_grpc_grpc_core',
+ 'version': 'version:2@1.49.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
- 'src/third_party/android_deps/libs/org_apache_maven_maven_artifact': {
+
+ 'src/third_party/android_deps/libs/io_grpc_grpc_protobuf_lite': {
'packages': [
{
- 'package': 'chromium/third_party/android_deps/libs/org_apache_maven_maven_artifact',
- 'version': 'version:2@2.2.1.cr0',
+ 'package': 'chromium/third_party/android_deps/libs/io_grpc_grpc_protobuf_lite',
+ 'version': 'version:2@1.49.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
- 'src/third_party/android_deps/libs/org_apache_maven_maven_artifact_manager': {
+
+ 'src/third_party/android_deps/libs/io_grpc_grpc_stub': {
'packages': [
{
- 'package': 'chromium/third_party/android_deps/libs/org_apache_maven_maven_artifact_manager',
- 'version': 'version:2@2.2.1.cr0',
+ 'package': 'chromium/third_party/android_deps/libs/io_grpc_grpc_stub',
+ 'version': 'version:2@1.49.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
- 'src/third_party/android_deps/libs/org_apache_maven_maven_error_diagnostics': {
+
+ 'src/third_party/android_deps/libs/io_perfmark_perfmark_api': {
'packages': [
{
- 'package': 'chromium/third_party/android_deps/libs/org_apache_maven_maven_error_diagnostics',
- 'version': 'version:2@2.2.1.cr0',
+ 'package': 'chromium/third_party/android_deps/libs/io_perfmark_perfmark_api',
+ 'version': 'version:2@0.25.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
- 'src/third_party/android_deps/libs/org_apache_maven_maven_model': {
+
+ 'src/third_party/android_deps/libs/javax_annotation_javax_annotation_api': {
'packages': [
{
- 'package': 'chromium/third_party/android_deps/libs/org_apache_maven_maven_model',
- 'version': 'version:2@2.2.1.cr0',
+ 'package': 'chromium/third_party/android_deps/libs/javax_annotation_javax_annotation_api',
+ 'version': 'version:2@1.3.2.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
- 'src/third_party/android_deps/libs/org_apache_maven_maven_plugin_registry': {
+
+ 'src/third_party/android_deps/libs/javax_annotation_jsr250_api': {
'packages': [
{
- 'package': 'chromium/third_party/android_deps/libs/org_apache_maven_maven_plugin_registry',
- 'version': 'version:2@2.2.1.cr0',
+ 'package': 'chromium/third_party/android_deps/libs/javax_annotation_jsr250_api',
+ 'version': 'version:2@1.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
- 'src/third_party/android_deps/libs/org_apache_maven_maven_profile': {
+
+ 'src/third_party/android_deps/libs/javax_inject_javax_inject': {
'packages': [
{
- 'package': 'chromium/third_party/android_deps/libs/org_apache_maven_maven_profile',
- 'version': 'version:2@2.2.1.cr0',
+ 'package': 'chromium/third_party/android_deps/libs/javax_inject_javax_inject',
+ 'version': 'version:2@1.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
- 'src/third_party/android_deps/libs/org_apache_maven_maven_project': {
+
+ 'src/third_party/android_deps/libs/net_bytebuddy_byte_buddy': {
'packages': [
{
- 'package': 'chromium/third_party/android_deps/libs/org_apache_maven_maven_project',
- 'version': 'version:2@2.2.1.cr0',
+ 'package': 'chromium/third_party/android_deps/libs/net_bytebuddy_byte_buddy',
+ 'version': 'version:2@1.14.4.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
- 'src/third_party/android_deps/libs/org_apache_maven_maven_repository_metadata': {
+
+ 'src/third_party/android_deps/libs/net_bytebuddy_byte_buddy_agent': {
'packages': [
{
- 'package': 'chromium/third_party/android_deps/libs/org_apache_maven_maven_repository_metadata',
- 'version': 'version:2@2.2.1.cr0',
+ 'package': 'chromium/third_party/android_deps/libs/net_bytebuddy_byte_buddy_agent',
+ 'version': 'version:2@1.14.4.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
- 'src/third_party/android_deps/libs/org_apache_maven_maven_settings': {
+
+ 'src/third_party/android_deps/libs/net_ltgt_gradle_incap_incap': {
'packages': [
{
- 'package': 'chromium/third_party/android_deps/libs/org_apache_maven_maven_settings',
- 'version': 'version:2@2.2.1.cr0',
+ 'package': 'chromium/third_party/android_deps/libs/net_ltgt_gradle_incap_incap',
+ 'version': 'version:2@0.2.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
- 'src/third_party/android_deps/libs/org_apache_maven_wagon_wagon_file': {
+
+ 'src/third_party/android_deps/libs/org_bouncycastle_bcprov_jdk18on': {
'packages': [
{
- 'package': 'chromium/third_party/android_deps/libs/org_apache_maven_wagon_wagon_file',
- 'version': 'version:2@1.0-beta-6.cr0',
+ 'package': 'chromium/third_party/android_deps/libs/org_bouncycastle_bcprov_jdk18on',
+ 'version': 'version:2@1.72.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
- 'src/third_party/android_deps/libs/org_apache_maven_wagon_wagon_http_lightweight': {
+
+ 'src/third_party/android_deps/libs/org_ccil_cowan_tagsoup_tagsoup': {
'packages': [
{
- 'package': 'chromium/third_party/android_deps/libs/org_apache_maven_wagon_wagon_http_lightweight',
- 'version': 'version:2@1.0-beta-6.cr0',
+ 'package': 'chromium/third_party/android_deps/libs/org_ccil_cowan_tagsoup_tagsoup',
+ 'version': 'version:2@1.2.1.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
- 'src/third_party/android_deps/libs/org_apache_maven_wagon_wagon_http_shared': {
+
+ 'src/third_party/android_deps/libs/org_checkerframework_checker_compat_qual': {
'packages': [
{
- 'package': 'chromium/third_party/android_deps/libs/org_apache_maven_wagon_wagon_http_shared',
- 'version': 'version:2@1.0-beta-6.cr0',
+ 'package': 'chromium/third_party/android_deps/libs/org_checkerframework_checker_compat_qual',
+ 'version': 'version:2@2.5.5.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
- 'src/third_party/android_deps/libs/org_apache_maven_wagon_wagon_provider_api': {
+
+ 'src/third_party/android_deps/libs/org_checkerframework_checker_qual': {
'packages': [
{
- 'package': 'chromium/third_party/android_deps/libs/org_apache_maven_wagon_wagon_provider_api',
- 'version': 'version:2@1.0-beta-6.cr0',
+ 'package': 'chromium/third_party/android_deps/libs/org_checkerframework_checker_qual',
+ 'version': 'version:2@3.25.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
- 'src/third_party/android_deps/libs/org_ccil_cowan_tagsoup_tagsoup': {
+
+ 'src/third_party/android_deps/libs/org_checkerframework_checker_util': {
'packages': [
{
- 'package': 'chromium/third_party/android_deps/libs/org_ccil_cowan_tagsoup_tagsoup',
- 'version': 'version:2@1.2.1.cr0',
+ 'package': 'chromium/third_party/android_deps/libs/org_checkerframework_checker_util',
+ 'version': 'version:2@3.25.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
- 'src/third_party/android_deps/libs/org_checkerframework_checker_compat_qual': {
+
+ 'src/third_party/android_deps/libs/org_checkerframework_dataflow_errorprone': {
'packages': [
{
- 'package': 'chromium/third_party/android_deps/libs/org_checkerframework_checker_compat_qual',
- 'version': 'version:2@2.5.5.cr0',
+ 'package': 'chromium/third_party/android_deps/libs/org_checkerframework_dataflow_errorprone',
+ 'version': 'version:2@3.15.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
- 'src/third_party/android_deps/libs/org_checkerframework_checker_qual': {
+
+ 'src/third_party/android_deps/libs/org_codehaus_mojo_animal_sniffer_annotations': {
'packages': [
{
- 'package': 'chromium/third_party/android_deps/libs/org_checkerframework_checker_qual',
- 'version': 'version:2@3.12.0.cr0',
+ 'package': 'chromium/third_party/android_deps/libs/org_codehaus_mojo_animal_sniffer_annotations',
+ 'version': 'version:2@1.21.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
- 'src/third_party/android_deps/libs/org_checkerframework_dataflow_errorprone': {
+
+ 'src/third_party/android_deps/libs/org_conscrypt_conscrypt_openjdk_uber': {
'packages': [
{
- 'package': 'chromium/third_party/android_deps/libs/org_checkerframework_dataflow_errorprone',
- 'version': 'version:2@3.15.0.cr0',
+ 'package': 'chromium/third_party/android_deps/libs/org_conscrypt_conscrypt_openjdk_uber',
+ 'version': 'version:2@2.5.2.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
- 'src/third_party/android_deps/libs/org_codehaus_mojo_animal_sniffer_annotations': {
+
+ 'src/third_party/android_deps/libs/org_eclipse_jgit_org_eclipse_jgit': {
'packages': [
{
- 'package': 'chromium/third_party/android_deps/libs/org_codehaus_mojo_animal_sniffer_annotations',
- 'version': 'version:2@1.17.cr0',
+ 'package': 'chromium/third_party/android_deps/libs/org_eclipse_jgit_org_eclipse_jgit',
+ 'version': 'version:2@4.4.1.201607150455-r.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
- 'src/third_party/android_deps/libs/org_codehaus_plexus_plexus_container_default': {
+
+ 'src/third_party/android_deps/libs/org_hamcrest_hamcrest': {
'packages': [
{
- 'package': 'chromium/third_party/android_deps/libs/org_codehaus_plexus_plexus_container_default',
- 'version': 'version:2@1.0-alpha-9-stable-1.cr0',
+ 'package': 'chromium/third_party/android_deps/libs/org_hamcrest_hamcrest',
+ 'version': 'version:2@2.2.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
- 'src/third_party/android_deps/libs/org_codehaus_plexus_plexus_interpolation': {
+
+ 'src/third_party/android_deps/libs/org_jetbrains_kotlin_kotlin_stdlib_jdk7': {
'packages': [
{
- 'package': 'chromium/third_party/android_deps/libs/org_codehaus_plexus_plexus_interpolation',
- 'version': 'version:2@1.11.cr0',
+ 'package': 'chromium/third_party/android_deps/libs/org_jetbrains_kotlin_kotlin_stdlib_jdk7',
+ 'version': 'version:2@1.8.20.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
- 'src/third_party/android_deps/libs/org_codehaus_plexus_plexus_utils': {
+
+ 'src/third_party/android_deps/libs/org_jetbrains_kotlin_kotlin_stdlib_jdk8': {
'packages': [
{
- 'package': 'chromium/third_party/android_deps/libs/org_codehaus_plexus_plexus_utils',
- 'version': 'version:2@1.5.15.cr0',
+ 'package': 'chromium/third_party/android_deps/libs/org_jetbrains_kotlin_kotlin_stdlib_jdk8',
+ 'version': 'version:2@1.8.20.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
- 'src/third_party/android_deps/libs/org_eclipse_jgit_org_eclipse_jgit': {
+
+ 'src/third_party/android_deps/libs/org_jetbrains_kotlinx_kotlinx_coroutines_android': {
'packages': [
{
- 'package': 'chromium/third_party/android_deps/libs/org_eclipse_jgit_org_eclipse_jgit',
- 'version': 'version:2@4.4.1.201607150455-r.cr0',
+ 'package': 'chromium/third_party/android_deps/libs/org_jetbrains_kotlinx_kotlinx_coroutines_android',
+ 'version': 'version:2@1.6.4.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
- 'src/third_party/android_deps/libs/org_jetbrains_annotations': {
+
+ 'src/third_party/android_deps/libs/org_jetbrains_kotlinx_kotlinx_coroutines_core_jvm': {
'packages': [
{
- 'package': 'chromium/third_party/android_deps/libs/org_jetbrains_annotations',
- 'version': 'version:2@13.0.cr0',
+ 'package': 'chromium/third_party/android_deps/libs/org_jetbrains_kotlinx_kotlinx_coroutines_core_jvm',
+ 'version': 'version:2@1.6.4.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
- 'src/third_party/android_deps/libs/org_jetbrains_kotlin_kotlin_stdlib': {
+
+ 'src/third_party/android_deps/libs/org_jetbrains_kotlinx_kotlinx_coroutines_guava': {
'packages': [
{
- 'package': 'chromium/third_party/android_deps/libs/org_jetbrains_kotlin_kotlin_stdlib',
- 'version': 'version:2@1.6.0.cr0',
+ 'package': 'chromium/third_party/android_deps/libs/org_jetbrains_kotlinx_kotlinx_coroutines_guava',
+ 'version': 'version:2@1.6.4.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
- 'src/third_party/android_deps/libs/org_jetbrains_kotlin_kotlin_stdlib_common': {
+
+ 'src/third_party/android_deps/libs/org_jetbrains_kotlinx_kotlinx_metadata_jvm': {
'packages': [
{
- 'package': 'chromium/third_party/android_deps/libs/org_jetbrains_kotlin_kotlin_stdlib_common',
- 'version': 'version:2@1.6.0.cr0',
+ 'package': 'chromium/third_party/android_deps/libs/org_jetbrains_kotlinx_kotlinx_metadata_jvm',
+ 'version': 'version:2@0.1.0.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
- 'src/third_party/android_deps/libs/org_jetbrains_kotlin_kotlin_stdlib_jdk7': {
+
+ 'src/third_party/android_deps/libs/org_jsoup_jsoup': {
'packages': [
{
- 'package': 'chromium/third_party/android_deps/libs/org_jetbrains_kotlin_kotlin_stdlib_jdk7',
- 'version': 'version:2@1.5.0.cr0',
+ 'package': 'chromium/third_party/android_deps/libs/org_jsoup_jsoup',
+ 'version': 'version:2@1.15.1.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
- 'src/third_party/android_deps/libs/org_jetbrains_kotlin_kotlin_stdlib_jdk8': {
+
+ 'src/third_party/android_deps/libs/org_mockito_mockito_android': {
'packages': [
{
- 'package': 'chromium/third_party/android_deps/libs/org_jetbrains_kotlin_kotlin_stdlib_jdk8',
- 'version': 'version:2@1.5.0.cr0',
+ 'package': 'chromium/third_party/android_deps/libs/org_mockito_mockito_android',
+ 'version': 'version:2@5.3.1.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
- 'src/third_party/android_deps/libs/org_jetbrains_kotlinx_kotlinx_coroutines_android': {
+
+ 'src/third_party/android_deps/libs/org_mockito_mockito_core': {
'packages': [
{
- 'package': 'chromium/third_party/android_deps/libs/org_jetbrains_kotlinx_kotlinx_coroutines_android',
- 'version': 'version:2@1.5.0.cr0',
+ 'package': 'chromium/third_party/android_deps/libs/org_mockito_mockito_core',
+ 'version': 'version:2@5.3.1.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
- 'src/third_party/android_deps/libs/org_jetbrains_kotlinx_kotlinx_coroutines_core_jvm': {
+
+ 'src/third_party/android_deps/libs/org_mockito_mockito_subclass': {
'packages': [
{
- 'package': 'chromium/third_party/android_deps/libs/org_jetbrains_kotlinx_kotlinx_coroutines_core_jvm',
- 'version': 'version:2@1.5.0.cr0',
+ 'package': 'chromium/third_party/android_deps/libs/org_mockito_mockito_subclass',
+ 'version': 'version:2@5.3.1.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
- 'src/third_party/android_deps/libs/org_jetbrains_kotlinx_kotlinx_metadata_jvm': {
+
+ 'src/third_party/android_deps/libs/org_objenesis_objenesis': {
'packages': [
{
- 'package': 'chromium/third_party/android_deps/libs/org_jetbrains_kotlinx_kotlinx_metadata_jvm',
- 'version': 'version:2@0.1.0.cr0',
+ 'package': 'chromium/third_party/android_deps/libs/org_objenesis_objenesis',
+ 'version': 'version:2@3.3.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/org_ow2_asm_asm': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/org_ow2_asm_asm',
- 'version': 'version:2@7.0.cr0',
+ 'version': 'version:2@9.5.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/org_ow2_asm_asm_analysis': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/org_ow2_asm_asm_analysis',
- 'version': 'version:2@7.0.cr0',
+ 'version': 'version:2@9.5.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/org_ow2_asm_asm_commons': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/org_ow2_asm_asm_commons',
- 'version': 'version:2@7.0.cr0',
+ 'version': 'version:2@9.5.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/org_ow2_asm_asm_tree': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/org_ow2_asm_asm_tree',
- 'version': 'version:2@7.0.cr0',
+ 'version': 'version:2@9.5.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/org_ow2_asm_asm_util': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/org_ow2_asm_asm_util',
- 'version': 'version:2@7.0.cr0',
+ 'version': 'version:2@9.5.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/org_pcollections_pcollections': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/org_pcollections_pcollections',
- 'version': 'version:2@2.1.2.cr0',
+ 'version': 'version:2@3.1.4.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/org_robolectric_annotations': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/org_robolectric_annotations',
- 'version': 'version:2@4.3.1.cr0',
+ 'version': 'version:2@4.10.3.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/org_robolectric_junit': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/org_robolectric_junit',
- 'version': 'version:2@4.3.1.cr0',
+ 'version': 'version:2@4.10.3.cr1',
+ },
+ ],
+ 'condition': 'checkout_android',
+ 'dep_type': 'cipd',
+ },
+
+ 'src/third_party/android_deps/libs/org_robolectric_nativeruntime': {
+ 'packages': [
+ {
+ 'package': 'chromium/third_party/android_deps/libs/org_robolectric_nativeruntime',
+ 'version': 'version:2@4.10.3.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
+ 'src/third_party/android_deps/libs/org_robolectric_nativeruntime_dist_compat': {
+ 'packages': [
+ {
+ 'package': 'chromium/third_party/android_deps/libs/org_robolectric_nativeruntime_dist_compat',
+ 'version': 'version:2@1.0.1.cr1',
+ },
+ ],
+ 'condition': 'checkout_android',
+ 'dep_type': 'cipd',
+ },
+
'src/third_party/android_deps/libs/org_robolectric_pluginapi': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/org_robolectric_pluginapi',
- 'version': 'version:2@4.3.1.cr0',
+ 'version': 'version:2@4.10.3.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/org_robolectric_plugins_maven_dependency_resolver': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/org_robolectric_plugins_maven_dependency_resolver',
- 'version': 'version:2@4.3.1.cr0',
+ 'version': 'version:2@4.10.3.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/org_robolectric_resources': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/org_robolectric_resources',
- 'version': 'version:2@4.3.1.cr0',
+ 'version': 'version:2@4.10.3.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/org_robolectric_robolectric': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/org_robolectric_robolectric',
- 'version': 'version:2@4.3.1.cr0',
+ 'version': 'version:2@4.10.3.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/org_robolectric_sandbox': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/org_robolectric_sandbox',
- 'version': 'version:2@4.3.1.cr0',
+ 'version': 'version:2@4.10.3.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/org_robolectric_shadowapi': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/org_robolectric_shadowapi',
- 'version': 'version:2@4.3.1.cr0',
+ 'version': 'version:2@4.10.3.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/org_robolectric_shadows_framework': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/org_robolectric_shadows_framework',
- 'version': 'version:2@4.3.1.cr0',
+ 'version': 'version:2@4.10.3.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/org_robolectric_shadows_playservices': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/org_robolectric_shadows_playservices',
- 'version': 'version:2@4.3.1.cr0',
+ 'version': 'version:2@4.10.3.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/org_robolectric_utils': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/org_robolectric_utils',
- 'version': 'version:2@4.3.1.cr0',
+ 'version': 'version:2@4.10.3.cr1',
},
],
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+
'src/third_party/android_deps/libs/org_robolectric_utils_reflector': {
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/org_robolectric_utils_reflector',
- 'version': 'version:2@4.3.1.cr0',
+ 'version': 'version:2@4.10.3.cr1',
},
],
'condition': 'checkout_android',
@@ -2197,30 +2382,75 @@ hooks = [
'condition': 'checkout_mac',
},
{
- 'name': 'msan_chained_origins',
+ 'name': 'msan_chained_origins_focal',
+ 'pattern': '.',
+ 'condition': 'checkout_instrumented_libraries',
+ 'action': [ 'python3',
+ 'src/third_party/depot_tools/download_from_google_storage.py',
+ '--no_resume',
+ '--no_auth',
+ '--bucket', 'chromium-instrumented-libraries',
+ '-s', 'src/third_party/instrumented_libraries/binaries/msan-chained-origins-focal.tgz.sha1',
+ ],
+ },
+ {
+ 'name': 'msan_no_origins_focal',
+ 'pattern': '.',
+ 'condition': 'checkout_instrumented_libraries',
+ 'action': [ 'python3',
+ 'src/third_party/depot_tools/download_from_google_storage.py',
+ '--no_resume',
+ '--no_auth',
+ '--bucket', 'chromium-instrumented-libraries',
+ '-s', 'src/third_party/instrumented_libraries/binaries/msan-no-origins-focal.tgz.sha1',
+ ],
+ },
+ {
+ 'name': 'msan_chained_origins_focal',
'pattern': '.',
'condition': 'checkout_instrumented_libraries',
'action': [ 'python3',
'src/third_party/depot_tools/download_from_google_storage.py',
- "--no_resume",
- "--no_auth",
- "--bucket", "chromium-instrumented-libraries",
- "-s", "src/third_party/instrumented_libraries/binaries/msan-chained-origins.tgz.sha1",
+ '--no_resume',
+ '--no_auth',
+ '--bucket', 'chromium-instrumented-libraries',
+ '-s', 'src/third_party/instrumented_libraries/binaries/msan-chained-origins-focal.tgz.sha1',
],
},
{
- 'name': 'msan_no_origins',
+ 'name': 'msan_no_origins_focal',
'pattern': '.',
'condition': 'checkout_instrumented_libraries',
'action': [ 'python3',
'src/third_party/depot_tools/download_from_google_storage.py',
- "--no_resume",
- "--no_auth",
- "--bucket", "chromium-instrumented-libraries",
- "-s", "src/third_party/instrumented_libraries/binaries/msan-no-origins.tgz.sha1",
+ '--no_resume',
+ '--no_auth',
+ '--bucket', 'chromium-instrumented-libraries',
+ '-s', 'src/third_party/instrumented_libraries/binaries/msan-no-origins-focal.tgz.sha1',
],
},
{
+ 'name': 'Download Fuchsia SDK from GCS',
+ 'pattern': '.',
+ 'condition': 'checkout_fuchsia',
+ 'action': [
+ 'python3',
+ 'src/build/fuchsia/update_sdk.py',
+ '--cipd-prefix={fuchsia_sdk_cipd_prefix}',
+ '--version={fuchsia_version}',
+ ],
+ },
+ {
+ 'name': 'Download Fuchsia system images',
+ 'pattern': '.',
+ 'condition': 'checkout_fuchsia and checkout_fuchsia_product_bundles',
+ 'action': [
+ 'python3',
+ 'src/build/fuchsia/update_product_bundles.py',
+ '{checkout_fuchsia_boot_images}',
+ ],
+ },
+ {
# Pull clang if needed or requested via GYP_DEFINES.
# Note: On Win, this should run after win_toolchain, as it may use it.
'name': 'clang',
@@ -2238,7 +2468,9 @@ hooks = [
{
'name': 'clang_format_win',
'pattern': '.',
- 'action': [ 'download_from_google_storage',
+ 'condition': 'host_os == "win"',
+ 'action': [ 'python3',
+ 'src/third_party/depot_tools/download_from_google_storage.py',
'--no_resume',
'--platform=win32',
'--no_auth',
@@ -2247,21 +2479,38 @@ hooks = [
],
},
{
- 'name': 'clang_format_mac',
+ 'name': 'clang_format_mac_x64',
'pattern': '.',
- 'action': [ 'download_from_google_storage',
+ 'condition': 'host_os == "mac" and host_cpu == "x64"',
+ 'action': [ 'python3',
+ 'src/third_party/depot_tools/download_from_google_storage.py',
'--no_resume',
'--platform=darwin',
'--no_auth',
'--bucket', 'chromium-clang-format',
- '-s', 'src/buildtools/mac/clang-format.sha1',
+ '-s', 'src/buildtools/mac/clang-format.x64.sha1',
+ '-o', 'src/buildtools/mac/clang-format',
],
},
{
+ 'name': 'clang_format_mac_arm64',
+ 'pattern': '.',
+ 'condition': 'host_os == "mac" and host_cpu == "arm64"',
+ 'action': [ 'python3',
+ 'src/third_party/depot_tools/download_from_google_storage.py',
+ '--no_resume',
+ '--no_auth',
+ '--bucket', 'chromium-clang-format',
+ '-s', 'src/buildtools/mac/clang-format.arm64.sha1',
+ '-o', 'src/buildtools/mac/clang-format',
+ ],
+ },
+ {
'name': 'clang_format_linux',
'pattern': '.',
'condition': 'host_os == "linux"',
- 'action': [ 'download_from_google_storage',
+ 'action': [ 'python3',
+ 'src/third_party/depot_tools/download_from_google_storage.py',
'--no_resume',
'--platform=linux*',
'--no_auth',
@@ -2304,18 +2553,6 @@ hooks = [
],
},
{
- # We used to use src as a CIPD root. We moved it to a different directory
- # in crrev.com/c/930178 but left the clobber here to ensure that that CL
- # could be reverted safely. This can be safely removed once crbug.com/794764
- # is resolved.
- 'name': 'Android Clobber Deprecated CIPD Root',
- 'pattern': '.',
- 'condition': 'checkout_android',
- 'action': ['src/build/cipd/clobber_cipd_root.py',
- '--root', 'src',
- ],
- },
- {
'name': 'Generate component metadata for tests',
'pattern': '.',
'action': [
diff --git a/files/README.chromium b/files/README.chromium
index 3f68e21e..880191e4 100644
--- a/files/README.chromium
+++ b/files/README.chromium
@@ -1,6 +1,6 @@
Name: libyuv
URL: http://code.google.com/p/libyuv/
-Version: 1837
+Version: 1871
License: BSD
License File: LICENSE
diff --git a/files/README.md b/files/README.md
index db70b7f0..95eeb04c 100644
--- a/files/README.md
+++ b/files/README.md
@@ -7,6 +7,7 @@
* Optimized for SSSE3/AVX2 on x86/x64.
* Optimized for Neon on Arm.
* Optimized for MSA on Mips.
+* Optimized for RVV on RISC-V.
### Development
diff --git a/files/build_overrides/build.gni b/files/build_overrides/build.gni
index c8490313..d9d01d51 100644
--- a/files/build_overrides/build.gni
+++ b/files/build_overrides/build.gni
@@ -13,6 +13,9 @@ build_with_chromium = false
# Some non-Chromium builds don't support building java targets.
enable_java_templates = true
+# Enables assertions on safety checks in libc++.
+enable_safe_libcxx = true
+
# Allow using custom suppressions files (currently not used by libyuv).
asan_suppressions_file = "//build/sanitizers/asan_suppressions.cc"
lsan_suppressions_file = "//build/sanitizers/lsan_suppressions.cc"
diff --git a/files/build_overrides/partition_alloc.gni b/files/build_overrides/partition_alloc.gni
new file mode 100644
index 00000000..dcf8ac2d
--- /dev/null
+++ b/files/build_overrides/partition_alloc.gni
@@ -0,0 +1,17 @@
+# Copyright 2022 The LibYuv Project Authors. All rights reserved.
+#
+# Use of this source code is governed by a BSD-style license
+# that can be found in the LICENSE file in the root of the source
+# tree. An additional intellectual property rights grant can be found
+# in the file PATENTS. All contributing project authors may
+# be found in the AUTHORS file in the root of the source tree.
+
+# Use default values for PartitionAlloc as standalone library from
+# base/allocator/partition_allocator/build_overrides/partition_alloc.gni
+use_partition_alloc_as_malloc_default = false
+use_allocator_shim_default = false
+enable_backup_ref_ptr_support_default = false
+enable_mte_checked_ptr_support_default = false
+put_ref_count_in_previous_slot_default = false
+enable_backup_ref_ptr_slow_checks_default = false
+enable_dangling_raw_ptr_checks_default = false
diff --git a/files/docs/environment_variables.md b/files/docs/environment_variables.md
index dd5d59fb..4eb09659 100644
--- a/files/docs/environment_variables.md
+++ b/files/docs/environment_variables.md
@@ -40,6 +40,9 @@ By default the cpu is detected and the most advanced form of SIMD is used. But
LIBYUV_DISABLE_LSX
LIBYUV_DISABLE_LASX
+## RISCV CPUs
+ LIBYUV_DISABLE_RVV
+
# Test Width/Height/Repeat
The unittests default to a small image (128x72) to run fast. This can be set by environment variable to test a specific resolutions.
diff --git a/files/docs/getting_started.md b/files/docs/getting_started.md
index 15b19ab2..b19f0009 100644
--- a/files/docs/getting_started.md
+++ b/files/docs/getting_started.md
@@ -220,6 +220,35 @@ Install cmake: http://www.cmake.org/
make -j4
make package
+## Building RISC-V target with cmake
+
+### Prerequisite: build risc-v clang toolchain and qemu
+
+If you don't have prebuilt clang and riscv64 qemu, run the script to download source and build them.
+
+ ./riscv_script/prepare_toolchain_qemu.sh
+
+After running script, clang & qemu are built in `build-toolchain-qemu/riscv-clang/` & `build-toolchain-qemu/riscv-qemu/`.
+
+### Cross-compile for RISC-V target
+ cmake -B out/Release/ -DUNIT_TEST=ON \
+ -DCMAKE_BUILD_TYPE=Release \
+ -DCMAKE_TOOLCHAIN_FILE="./riscv_script/riscv-clang.cmake" \
+ -DTOOLCHAIN_PATH={TOOLCHAIN_PATH} \
+ -DUSE_RVV=ON .
+ cmake --build out/Release/
+
+
+### Run on QEMU
+
+#### Run libyuv_unittest on QEMU
+ cd out/Release/
+ USE_RVV=ON \
+ TOOLCHAIN_PATH={TOOLCHAIN_PATH} \
+ QEMU_PREFIX_PATH={QEMU_PREFIX_PATH} \
+ ../../riscv_script/run_qemu.sh libyuv_unittest
+
+
## Setup for Arm Cross compile
See also https://www.ccoderun.ca/programming/2015-12-20_CrossCompiling/index.html
diff --git a/files/include/libyuv/convert.h b/files/include/libyuv/convert.h
index 46d37159..88619a4f 100644
--- a/files/include/libyuv/convert.h
+++ b/files/include/libyuv/convert.h
@@ -151,6 +151,33 @@ int MM21ToI420(const uint8_t* src_y,
int width,
int height);
+// Convert MM21 to YUY2
+LIBYUV_API
+int MM21ToYUY2(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_uv,
+ int src_stride_uv,
+ uint8_t* dst_yuy2,
+ int dst_stride_yuy2,
+ int width,
+ int height);
+
+// Convert MT2T to P010
+// Note that src_y and src_uv point to packed 10-bit values, so the Y plane will
+// be 10 / 8 times the dimensions of the image. Also for this reason,
+// src_stride_y and src_stride_uv are given in bytes.
+LIBYUV_API
+int MT2TToP010(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_uv,
+ int src_stride_uv,
+ uint16_t* dst_y,
+ int dst_stride_y,
+ uint16_t* dst_uv,
+ int dst_stride_uv,
+ int width,
+ int height);
+
// Convert I422 to NV21.
LIBYUV_API
int I422ToNV21(const uint8_t* src_y,
@@ -272,6 +299,23 @@ int I210ToI422(const uint16_t* src_y,
int width,
int height);
+#define H410ToH420 I410ToI420
+LIBYUV_API
+int I410ToI420(const uint16_t* src_y,
+ int src_stride_y,
+ const uint16_t* src_u,
+ int src_stride_u,
+ const uint16_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ uint8_t* dst_u,
+ int dst_stride_u,
+ uint8_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height);
+
#define H410ToH444 I410ToI444
LIBYUV_API
int I410ToI444(const uint16_t* src_y,
@@ -323,6 +367,23 @@ int I212ToI422(const uint16_t* src_y,
int width,
int height);
+#define H212ToH420 I212ToI420
+LIBYUV_API
+int I212ToI420(const uint16_t* src_y,
+ int src_stride_y,
+ const uint16_t* src_u,
+ int src_stride_u,
+ const uint16_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ uint8_t* dst_u,
+ int dst_stride_u,
+ uint8_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height);
+
#define H412ToH444 I412ToI444
LIBYUV_API
int I412ToI444(const uint16_t* src_y,
@@ -340,6 +401,23 @@ int I412ToI444(const uint16_t* src_y,
int width,
int height);
+#define H412ToH420 I412ToI420
+LIBYUV_API
+int I412ToI420(const uint16_t* src_y,
+ int src_stride_y,
+ const uint16_t* src_u,
+ int src_stride_u,
+ const uint16_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ uint8_t* dst_u,
+ int dst_stride_u,
+ uint8_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height);
+
#define I412ToI012 I410ToI010
#define H410ToH010 I410ToI010
#define H412ToH012 I410ToI010
@@ -560,6 +638,36 @@ int NV16ToNV24(const uint8_t* src_y,
int width,
int height);
+// Convert P010 to I010.
+LIBYUV_API
+int P010ToI010(const uint16_t* src_y,
+ int src_stride_y,
+ const uint16_t* src_uv,
+ int src_stride_uv,
+ uint16_t* dst_y,
+ int dst_stride_y,
+ uint16_t* dst_u,
+ int dst_stride_u,
+ uint16_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height);
+
+// Convert P012 to I012.
+LIBYUV_API
+int P012ToI012(const uint16_t* src_y,
+ int src_stride_y,
+ const uint16_t* src_uv,
+ int src_stride_uv,
+ uint16_t* dst_y,
+ int dst_stride_y,
+ uint16_t* dst_u,
+ int dst_stride_u,
+ uint16_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height);
+
// Convert P010 to P410.
LIBYUV_API
int P010ToP410(const uint16_t* src_y,
@@ -677,6 +785,21 @@ int ARGBToI420(const uint8_t* src_argb,
int width,
int height);
+// Convert ARGB to I420 with Alpha
+LIBYUV_API
+int ARGBToI420Alpha(const uint8_t* src_argb,
+ int src_stride_argb,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ uint8_t* dst_u,
+ int dst_stride_u,
+ uint8_t* dst_v,
+ int dst_stride_v,
+ uint8_t* dst_a,
+ int dst_stride_a,
+ int width,
+ int height);
+
// BGRA little endian (argb in memory) to I420.
LIBYUV_API
int BGRAToI420(const uint8_t* src_bgra,
diff --git a/files/include/libyuv/convert_argb.h b/files/include/libyuv/convert_argb.h
index f66d20ce..35eeac9b 100644
--- a/files/include/libyuv/convert_argb.h
+++ b/files/include/libyuv/convert_argb.h
@@ -67,6 +67,8 @@ LIBYUV_API extern const struct YuvConstants kYvuV2020Constants; // BT.2020 full
I210ToAR30Matrix(a, b, e, f, c, d, g, h, i##VU, j, k)
#define I410ToAB30Matrix(a, b, c, d, e, f, g, h, i, j, k) \
I410ToAR30Matrix(a, b, e, f, c, d, g, h, i##VU, j, k)
+#define I012ToAB30Matrix(a, b, c, d, e, f, g, h, i, j, k) \
+ I012ToAR30Matrix(a, b, e, f, c, d, g, h, i##VU, j, k)
#define I420AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, k, l, m, n) \
I420AlphaToARGBMatrix(a, b, e, f, c, d, g, h, i, j, k##VU, l, m, n)
#define I422AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, k, l, m, n) \
@@ -404,6 +406,32 @@ int U444ToABGR(const uint8_t* src_y,
int width,
int height);
+// Convert I444 to RGB24.
+LIBYUV_API
+int I444ToRGB24(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_rgb24,
+ int dst_stride_rgb24,
+ int width,
+ int height);
+
+// Convert I444 to RAW.
+LIBYUV_API
+int I444ToRAW(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_raw,
+ int dst_stride_raw,
+ int width,
+ int height);
+
// Convert I010 to ARGB.
LIBYUV_API
int I010ToARGB(const uint16_t* src_y,
@@ -1312,6 +1340,32 @@ int J420ToRAW(const uint8_t* src_y,
int width,
int height);
+// Convert I422 to RGB24.
+LIBYUV_API
+int I422ToRGB24(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_rgb24,
+ int dst_stride_rgb24,
+ int width,
+ int height);
+
+// Convert I422 to RAW.
+LIBYUV_API
+int I422ToRAW(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_raw,
+ int dst_stride_raw,
+ int width,
+ int height);
+
LIBYUV_API
int I420ToRGB565(const uint8_t* src_y,
int src_stride_y,
@@ -1495,6 +1549,20 @@ int I444ToARGBMatrix(const uint8_t* src_y,
int width,
int height);
+// Convert I444 to RGB24 with matrix.
+LIBYUV_API
+int I444ToRGB24Matrix(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_rgb24,
+ int dst_stride_rgb24,
+ const struct YuvConstants* yuvconstants,
+ int width,
+ int height);
+
// Convert 10 bit 420 YUV to ARGB with matrix.
LIBYUV_API
int I010ToAR30Matrix(const uint16_t* src_y,
@@ -1893,6 +1961,20 @@ int I420ToRGB24Matrix(const uint8_t* src_y,
int width,
int height);
+// Convert I422 to RGB24 with matrix.
+LIBYUV_API
+int I422ToRGB24Matrix(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_rgb24,
+ int dst_stride_rgb24,
+ const struct YuvConstants* yuvconstants,
+ int width,
+ int height);
+
// Convert I420 to RGB565 with specified color matrix.
LIBYUV_API
int I420ToRGB565Matrix(const uint8_t* src_y,
@@ -1907,6 +1989,20 @@ int I420ToRGB565Matrix(const uint8_t* src_y,
int width,
int height);
+// Convert I422 to RGB565 with specified color matrix.
+LIBYUV_API
+int I422ToRGB565Matrix(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_rgb565,
+ int dst_stride_rgb565,
+ const struct YuvConstants* yuvconstants,
+ int width,
+ int height);
+
// Convert I420 to AR30 with matrix.
LIBYUV_API
int I420ToAR30Matrix(const uint8_t* src_y,
@@ -1961,6 +2057,36 @@ int I422ToARGBMatrixFilter(const uint8_t* src_y,
int height,
enum FilterMode filter);
+// Convert I422 to RGB24 with matrix and UV filter mode.
+LIBYUV_API
+int I422ToRGB24MatrixFilter(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_rgb24,
+ int dst_stride_rgb24,
+ const struct YuvConstants* yuvconstants,
+ int width,
+ int height,
+ enum FilterMode filter);
+
+// Convert I420 to RGB24 with matrix and UV filter mode.
+LIBYUV_API
+int I420ToRGB24MatrixFilter(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_rgb24,
+ int dst_stride_rgb24,
+ const struct YuvConstants* yuvconstants,
+ int width,
+ int height,
+ enum FilterMode filter);
+
// Convert I010 to AR30 with matrix and UV filter mode.
LIBYUV_API
int I010ToAR30MatrixFilter(const uint16_t* src_y,
diff --git a/files/include/libyuv/convert_from_argb.h b/files/include/libyuv/convert_from_argb.h
index 2a488838..ff2a581a 100644
--- a/files/include/libyuv/convert_from_argb.h
+++ b/files/include/libyuv/convert_from_argb.h
@@ -209,10 +209,10 @@ int ARGBToJ420(const uint8_t* src_argb,
int src_stride_argb,
uint8_t* dst_yj,
int dst_stride_yj,
- uint8_t* dst_u,
- int dst_stride_u,
- uint8_t* dst_v,
- int dst_stride_v,
+ uint8_t* dst_uj,
+ int dst_stride_uj,
+ uint8_t* dst_vj,
+ int dst_stride_vj,
int width,
int height);
@@ -222,10 +222,10 @@ int ARGBToJ422(const uint8_t* src_argb,
int src_stride_argb,
uint8_t* dst_yj,
int dst_stride_yj,
- uint8_t* dst_u,
- int dst_stride_u,
- uint8_t* dst_v,
- int dst_stride_v,
+ uint8_t* dst_uj,
+ int dst_stride_uj,
+ uint8_t* dst_vj,
+ int dst_stride_vj,
int width,
int height);
@@ -238,6 +238,41 @@ int ARGBToJ400(const uint8_t* src_argb,
int width,
int height);
+// Convert ABGR to J420. (JPeg full range I420).
+LIBYUV_API
+int ABGRToJ420(const uint8_t* src_abgr,
+ int src_stride_abgr,
+ uint8_t* dst_yj,
+ int dst_stride_yj,
+ uint8_t* dst_uj,
+ int dst_stride_uj,
+ uint8_t* dst_vj,
+ int dst_stride_vj,
+ int width,
+ int height);
+
+// Convert ABGR to J422.
+LIBYUV_API
+int ABGRToJ422(const uint8_t* src_abgr,
+ int src_stride_abgr,
+ uint8_t* dst_yj,
+ int dst_stride_yj,
+ uint8_t* dst_uj,
+ int dst_stride_uj,
+ uint8_t* dst_vj,
+ int dst_stride_vj,
+ int width,
+ int height);
+
+// Convert ABGR to J400. (JPeg full range).
+LIBYUV_API
+int ABGRToJ400(const uint8_t* src_abgr,
+ int src_stride_abgr,
+ uint8_t* dst_yj,
+ int dst_stride_yj,
+ int width,
+ int height);
+
// Convert RGBA to J400. (JPeg full range).
LIBYUV_API
int RGBAToJ400(const uint8_t* src_rgba,
diff --git a/files/include/libyuv/cpu_id.h b/files/include/libyuv/cpu_id.h
index fb90c6c7..203f7e0d 100644
--- a/files/include/libyuv/cpu_id.h
+++ b/files/include/libyuv/cpu_id.h
@@ -55,6 +55,11 @@ static const int kCpuHasLOONGARCH = 0x2000000;
static const int kCpuHasLSX = 0x4000000;
static const int kCpuHasLASX = 0x8000000;
+// These flags are only valid on RISCV processors.
+static const int kCpuHasRISCV = 0x10000000;
+static const int kCpuHasRVV = 0x20000000;
+static const int kCpuHasRVVZVFH = 0x40000000;
+
// Optional init function. TestCpuFlag does an auto-init.
// Returns cpu_info flags.
LIBYUV_API
@@ -78,6 +83,8 @@ LIBYUV_API
int ArmCpuCaps(const char* cpuinfo_name);
LIBYUV_API
int MipsCpuCaps(const char* cpuinfo_name);
+LIBYUV_API
+int RiscvCpuCaps(const char* cpuinfo_name);
// For testing, allow CPU flags to be disabled.
// ie MaskCpuFlags(~kCpuHasSSSE3) to disable SSSE3.
diff --git a/files/include/libyuv/planar_functions.h b/files/include/libyuv/planar_functions.h
index 1ef2256b..154f2f21 100644
--- a/files/include/libyuv/planar_functions.h
+++ b/files/include/libyuv/planar_functions.h
@@ -85,13 +85,23 @@ void SetPlane(uint8_t* dst_y,
// Convert a plane of tiles of 16 x H to linear.
LIBYUV_API
-void DetilePlane(const uint8_t* src_y,
- int src_stride_y,
- uint8_t* dst_y,
- int dst_stride_y,
- int width,
- int height,
- int tile_height);
+int DetilePlane(const uint8_t* src_y,
+ int src_stride_y,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ int width,
+ int height,
+ int tile_height);
+
+// Convert a plane of 16 bit tiles of 16 x H to linear.
+LIBYUV_API
+int DetilePlane_16(const uint16_t* src_y,
+ int src_stride_y,
+ uint16_t* dst_y,
+ int dst_stride_y,
+ int width,
+ int height,
+ int tile_height);
// Convert a UV plane of tiles of 16 x H into linear U and V planes.
LIBYUV_API
@@ -105,6 +115,18 @@ void DetileSplitUVPlane(const uint8_t* src_uv,
int height,
int tile_height);
+// Convert a Y and UV plane of tiles into interlaced YUY2.
+LIBYUV_API
+void DetileToYUY2(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_uv,
+ int src_stride_uv,
+ uint8_t* dst_yuy2,
+ int dst_stride_yuy2,
+ int width,
+ int height,
+ int tile_height);
+
// Split interleaved UV plane into separate U and V planes.
LIBYUV_API
void SplitUVPlane(const uint8_t* src_uv,
@@ -370,7 +392,26 @@ int I210Copy(const uint16_t* src_y,
int width,
int height);
+// Copy I410 to I410.
+#define I410ToI410 I410Copy
+LIBYUV_API
+int I410Copy(const uint16_t* src_y,
+ int src_stride_y,
+ const uint16_t* src_u,
+ int src_stride_u,
+ const uint16_t* src_v,
+ int src_stride_v,
+ uint16_t* dst_y,
+ int dst_stride_y,
+ uint16_t* dst_u,
+ int dst_stride_u,
+ uint16_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height);
+
// Copy NV12. Supports inverting.
+LIBYUV_API
int NV12Copy(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_uv,
@@ -383,6 +424,7 @@ int NV12Copy(const uint8_t* src_y,
int height);
// Copy NV21. Supports inverting.
+LIBYUV_API
int NV21Copy(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_vu,
diff --git a/files/include/libyuv/rotate.h b/files/include/libyuv/rotate.h
index 684ed5e6..37460c4a 100644
--- a/files/include/libyuv/rotate.h
+++ b/files/include/libyuv/rotate.h
@@ -85,6 +85,60 @@ int I444Rotate(const uint8_t* src_y,
int height,
enum RotationMode mode);
+// Rotate I010 frame.
+LIBYUV_API
+int I010Rotate(const uint16_t* src_y,
+ int src_stride_y,
+ const uint16_t* src_u,
+ int src_stride_u,
+ const uint16_t* src_v,
+ int src_stride_v,
+ uint16_t* dst_y,
+ int dst_stride_y,
+ uint16_t* dst_u,
+ int dst_stride_u,
+ uint16_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height,
+ enum RotationMode mode);
+
+// Rotate I210 frame.
+LIBYUV_API
+int I210Rotate(const uint16_t* src_y,
+ int src_stride_y,
+ const uint16_t* src_u,
+ int src_stride_u,
+ const uint16_t* src_v,
+ int src_stride_v,
+ uint16_t* dst_y,
+ int dst_stride_y,
+ uint16_t* dst_u,
+ int dst_stride_u,
+ uint16_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height,
+ enum RotationMode mode);
+
+// Rotate I410 frame.
+LIBYUV_API
+int I410Rotate(const uint16_t* src_y,
+ int src_stride_y,
+ const uint16_t* src_u,
+ int src_stride_u,
+ const uint16_t* src_v,
+ int src_stride_v,
+ uint16_t* dst_y,
+ int dst_stride_y,
+ uint16_t* dst_u,
+ int dst_stride_u,
+ uint16_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height,
+ enum RotationMode mode);
+
// Rotate NV12 input and store in I420.
LIBYUV_API
int NV12ToI420Rotate(const uint8_t* src_y,
@@ -156,6 +210,16 @@ void RotatePlane270(const uint8_t* src,
int width,
int height);
+// Rotate a plane by 0, 90, 180, or 270.
+LIBYUV_API
+int RotatePlane_16(const uint16_t* src,
+ int src_stride,
+ uint16_t* dst,
+ int dst_stride,
+ int width,
+ int height,
+ enum RotationMode mode);
+
// Rotations for when U and V are interleaved.
// These functions take one UV input pointer and
// split the data into two buffers while
diff --git a/files/include/libyuv/rotate_row.h b/files/include/libyuv/rotate_row.h
index aa8528a9..2dd8c03d 100644
--- a/files/include/libyuv/rotate_row.h
+++ b/files/include/libyuv/rotate_row.h
@@ -42,6 +42,8 @@ extern "C" {
// The following are available for GCC 32 or 64 bit:
#if !defined(LIBYUV_DISABLE_X86) && (defined(__i386__) || defined(__x86_64__))
#define HAS_TRANSPOSEWX8_SSSE3
+#define HAS_TRANSPOSE4X4_32_SSE2
+#define HAS_TRANSPOSE4X4_32_AVX2
#endif
// The following are available for 64 bit GCC:
@@ -54,6 +56,7 @@ extern "C" {
(defined(__ARM_NEON__) || defined(LIBYUV_NEON) || defined(__aarch64__))
#define HAS_TRANSPOSEWX8_NEON
#define HAS_TRANSPOSEUVWX8_NEON
+#define HAS_TRANSPOSE4X4_32_NEON
#endif
#if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa)
@@ -215,6 +218,48 @@ void TransposeUVWx16_Any_LSX(const uint8_t* src,
uint8_t* dst_b,
int dst_stride_b,
int width);
+void TransposeWxH_16_C(const uint16_t* src,
+ int src_stride,
+ uint16_t* dst,
+ int dst_stride,
+ int width,
+ int height);
+
+void TransposeWx8_16_C(const uint16_t* src,
+ int src_stride,
+ uint16_t* dst,
+ int dst_stride,
+ int width);
+void TransposeWx1_16_C(const uint16_t* src,
+ int src_stride,
+ uint16_t* dst,
+ int dst_stride,
+ int width);
+
+// Transpose 32 bit values (ARGB)
+void Transpose4x4_32_NEON(const uint8_t* src,
+ int src_stride,
+ uint8_t* dst,
+ int dst_stride,
+ int width);
+
+void Transpose4x4_32_SSE2(const uint8_t* src,
+ int src_stride,
+ uint8_t* dst,
+ int dst_stride,
+ int width);
+
+void Transpose4x4_32_AVX2(const uint8_t* src,
+ int src_stride,
+ uint8_t* dst,
+ int dst_stride,
+ int width);
+
+void Transpose4x4_32_C(const uint8_t* src,
+ int src_stride,
+ uint8_t* dst,
+ int dst_stride,
+ int width);
#ifdef __cplusplus
} // extern "C"
diff --git a/files/include/libyuv/row.h b/files/include/libyuv/row.h
index 1a1cf4b6..5b244d77 100644
--- a/files/include/libyuv/row.h
+++ b/files/include/libyuv/row.h
@@ -11,7 +11,8 @@
#ifndef INCLUDE_LIBYUV_ROW_H_
#define INCLUDE_LIBYUV_ROW_H_
-#include <stdlib.h> // For malloc.
+#include <stddef.h> // For NULL
+#include <stdlib.h> // For malloc
#include "libyuv/basic_types.h"
@@ -75,9 +76,6 @@ extern "C" {
(defined(_M_IX86) || defined(__x86_64__) || defined(__i386__))
// Conversions:
#define HAS_ABGRTOYROW_SSSE3
-#if !defined(LIBYUV_BIT_EXACT)
-#define HAS_ABGRTOUVROW_SSSE3
-#endif
#define HAS_ARGB1555TOARGBROW_SSE2
#define HAS_ARGB4444TOARGBROW_SSE2
#define HAS_ARGBEXTRACTALPHAROW_SSE2
@@ -92,12 +90,6 @@ extern "C" {
#define HAS_ARGBTOYJROW_SSSE3
#define HAS_ARGBTOYROW_SSSE3
#define HAS_BGRATOYROW_SSSE3
-#if !defined(LIBYUV_BIT_EXACT)
-#define HAS_ARGBTOUV444ROW_SSSE3
-#define HAS_ARGBTOUVJROW_SSSE3
-#define HAS_ARGBTOUVROW_SSSE3
-#define HAS_BGRATOUVROW_SSSE3
-#endif
#define HAS_COPYROW_ERMS
#define HAS_COPYROW_SSE2
#define HAS_H422TOARGBROW_SSSE3
@@ -111,6 +103,7 @@ extern "C" {
#define HAS_I422TOUYVYROW_SSE2
#define HAS_I422TOYUY2ROW_SSE2
#define HAS_I444TOARGBROW_SSSE3
+#define HAS_I444TORGB24ROW_SSSE3
#define HAS_INTERPOLATEROW_SSSE3
#define HAS_J400TOARGBROW_SSE2
#define HAS_J422TOARGBROW_SSSE3
@@ -124,16 +117,13 @@ extern "C" {
#define HAS_NV21TORGB24ROW_SSSE3
#define HAS_RAWTOARGBROW_SSSE3
#define HAS_RAWTORGB24ROW_SSSE3
+#define HAS_RAWTOYJROW_SSSE3
#define HAS_RAWTOYROW_SSSE3
#define HAS_RGB24TOARGBROW_SSSE3
+#define HAS_RGB24TOYJROW_SSSE3
#define HAS_RGB24TOYROW_SSSE3
#define HAS_RGB565TOARGBROW_SSE2
#define HAS_RGBATOYROW_SSSE3
-#if !defined(LIBYUV_BIT_EXACT)
-#define HAS_RGB24TOYJROW_SSSE3
-#define HAS_RAWTOYJROW_SSSE3
-#define HAS_RGBATOUVROW_SSSE3
-#endif
#define HAS_SETROW_ERMS
#define HAS_SETROW_X86
#define HAS_SPLITUVROW_SSE2
@@ -145,13 +135,18 @@ extern "C" {
#define HAS_YUY2TOUV422ROW_SSE2
#define HAS_YUY2TOUVROW_SSE2
#define HAS_YUY2TOYROW_SSE2
+#if !defined(LIBYUV_BIT_EXACT)
+#define HAS_ABGRTOUVROW_SSSE3
+#define HAS_ARGBTOUV444ROW_SSSE3
+#define HAS_ARGBTOUVJROW_SSSE3
+#define HAS_ARGBTOUVROW_SSSE3
+#define HAS_BGRATOUVROW_SSSE3
+#define HAS_RGBATOUVROW_SSSE3
+#endif
// Effects:
#define HAS_ARGBADDROW_SSE2
#define HAS_ARGBAFFINEROW_SSE2
-#if !defined(LIBYUV_BIT_EXACT)
-#define HAS_ARGBATTENUATEROW_SSSE3
-#endif
#define HAS_ARGBBLENDROW_SSSE3
#define HAS_ARGBCOLORMATRIXROW_SSSE3
#define HAS_ARGBCOLORTABLEROW_X86
@@ -176,6 +171,9 @@ extern "C" {
#define HAS_SOBELXROW_SSE2
#define HAS_SOBELXYROW_SSE2
#define HAS_SOBELYROW_SSE2
+#if !defined(LIBYUV_BIT_EXACT)
+#define HAS_ARGBATTENUATEROW_SSSE3
+#endif
// The following functions fail on gcc/clang 32 bit with fpic and framepointer.
// caveat: clangcl uses row_win.cc which works.
@@ -201,17 +199,10 @@ extern "C" {
#define HAS_ARGBSHUFFLEROW_AVX2
#define HAS_ARGBTORGB565DITHERROW_AVX2
#define HAS_ARGBTOYJROW_AVX2
-#define HAS_RAWTOYJROW_AVX2
-#define HAS_RGB24TOYJROW_AVX2
#define HAS_ARGBTOYROW_AVX2
-#if !defined(LIBYUV_BIT_EXACT)
-#define HAS_ARGBTOUVJROW_AVX2
-#define HAS_ARGBTOUVROW_AVX2
-#endif
#define HAS_COPYROW_AVX
#define HAS_H422TOARGBROW_AVX2
#define HAS_HALFFLOATROW_AVX2
-// #define HAS_HALFFLOATROW_F16C // Enable to test halffloat cast
#define HAS_I422TOARGB1555ROW_AVX2
#define HAS_I422TOARGB4444ROW_AVX2
#define HAS_I422TOARGBROW_AVX2
@@ -219,6 +210,7 @@ extern "C" {
#define HAS_I422TORGB565ROW_AVX2
#define HAS_I422TORGBAROW_AVX2
#define HAS_I444TOARGBROW_AVX2
+#define HAS_I444TORGB24ROW_AVX2
#define HAS_INTERPOLATEROW_AVX2
#define HAS_J422TOARGBROW_AVX2
#define HAS_MERGEUVROW_AVX2
@@ -228,6 +220,8 @@ extern "C" {
#define HAS_NV12TORGB565ROW_AVX2
#define HAS_NV21TOARGBROW_AVX2
#define HAS_NV21TORGB24ROW_AVX2
+#define HAS_RAWTOYJROW_AVX2
+#define HAS_RGB24TOYJROW_AVX2
#define HAS_SPLITUVROW_AVX2
#define HAS_UYVYTOARGBROW_AVX2
#define HAS_UYVYTOUV422ROW_AVX2
@@ -237,16 +231,21 @@ extern "C" {
#define HAS_YUY2TOUV422ROW_AVX2
#define HAS_YUY2TOUVROW_AVX2
#define HAS_YUY2TOYROW_AVX2
+// #define HAS_HALFFLOATROW_F16C // Enable to test half float cast
+#if !defined(LIBYUV_BIT_EXACT)
+#define HAS_ARGBTOUVJROW_AVX2
+#define HAS_ARGBTOUVROW_AVX2
+#endif
// Effects:
#define HAS_ARGBADDROW_AVX2
-#if !defined(LIBYUV_BIT_EXACT)
-#define HAS_ARGBATTENUATEROW_AVX2
-#endif
#define HAS_ARGBMULTIPLYROW_AVX2
#define HAS_ARGBSUBTRACTROW_AVX2
#define HAS_ARGBUNATTENUATEROW_AVX2
#define HAS_BLENDPLANEROW_AVX2
+#if !defined(LIBYUV_BIT_EXACT)
+#define HAS_ARGBATTENUATEROW_AVX2
+#endif
#if defined(__x86_64__) || !defined(__pic__) || defined(__clang__) || \
defined(_MSC_VER)
@@ -282,28 +281,32 @@ extern "C" {
// The following are available for gcc/clang x86 platforms:
// TODO(fbarchard): Port to Visual C
#if !defined(LIBYUV_DISABLE_X86) && (defined(__x86_64__) || defined(__i386__))
+#define HAS_AB64TOARGBROW_SSSE3
#define HAS_ABGRTOAR30ROW_SSSE3
+#define HAS_ABGRTOYJROW_SSSE3
+#define HAS_AR64TOARGBROW_SSSE3
+#define HAS_ARGBTOAB64ROW_SSSE3
#define HAS_ARGBTOAR30ROW_SSSE3
#define HAS_ARGBTOAR64ROW_SSSE3
-#define HAS_ARGBTOAB64ROW_SSSE3
-#define HAS_AR64TOARGBROW_SSSE3
-#define HAS_AB64TOARGBROW_SSSE3
#define HAS_CONVERT16TO8ROW_SSSE3
#define HAS_CONVERT8TO16ROW_SSE2
#define HAS_DETILEROW_SSE2
+#define HAS_DETILEROW_16_SSE2
+#define HAS_DETILEROW_16_AVX
#define HAS_DETILESPLITUVROW_SSSE3
+#define HAS_DETILETOYUY2_SSE2
#define HAS_HALFMERGEUVROW_SSSE3
#define HAS_I210TOAR30ROW_SSSE3
#define HAS_I210TOARGBROW_SSSE3
#define HAS_I212TOAR30ROW_SSSE3
#define HAS_I212TOARGBROW_SSSE3
#define HAS_I400TOARGBROW_SSE2
-#define HAS_I422TOAR30ROW_SSSE3
#define HAS_I410TOAR30ROW_SSSE3
#define HAS_I410TOARGBROW_SSSE3
+#define HAS_I422TOAR30ROW_SSSE3
#define HAS_MERGEARGBROW_SSE2
-#define HAS_MERGEXRGBROW_SSE2
#define HAS_MERGERGBROW_SSSE3
+#define HAS_MERGEXRGBROW_SSE2
#define HAS_MIRRORUVROW_SSSE3
#define HAS_NV21TOYUV24ROW_SSSE3
#define HAS_P210TOAR30ROW_SSSE3
@@ -312,15 +315,17 @@ extern "C" {
#define HAS_P410TOARGBROW_SSSE3
#define HAS_RAWTORGBAROW_SSSE3
#define HAS_RGB24MIRRORROW_SSSE3
-#if !defined(LIBYUV_BIT_EXACT)
#define HAS_RGBATOYJROW_SSSE3
-#endif
#define HAS_SPLITARGBROW_SSE2
#define HAS_SPLITARGBROW_SSSE3
+#define HAS_SPLITRGBROW_SSSE3
#define HAS_SPLITXRGBROW_SSE2
#define HAS_SPLITXRGBROW_SSSE3
-#define HAS_SPLITRGBROW_SSSE3
#define HAS_SWAPUVROW_SSSE3
+#define HAS_YUY2TONVUVROW_SSE2
+#if !defined(LIBYUV_BIT_EXACT)
+#define HAS_ABGRTOUVJROW_SSSE3
+#endif
#if defined(__x86_64__) || !defined(__pic__)
// TODO(fbarchard): fix build error on android_full_debug=1
@@ -335,31 +340,20 @@ extern "C" {
#if !defined(LIBYUV_DISABLE_X86) && \
(defined(__x86_64__) || defined(__i386__)) && \
(defined(CLANG_HAS_AVX2) || defined(GCC_HAS_AVX2))
+#define HAS_AB64TOARGBROW_AVX2
#define HAS_ABGRTOAR30ROW_AVX2
-#if !defined(LIBYUV_BIT_EXACT)
-#define HAS_ABGRTOUVROW_AVX2
+#define HAS_ABGRTOYJROW_AVX2
#define HAS_ABGRTOYROW_AVX2
-#endif
+#define HAS_AR64TOARGBROW_AVX2
+#define HAS_ARGBTOAB64ROW_AVX2
#define HAS_ARGBTOAR30ROW_AVX2
+#define HAS_ARGBTOAR64ROW_AVX2
#define HAS_ARGBTORAWROW_AVX2
#define HAS_ARGBTORGB24ROW_AVX2
-#define HAS_ARGBTOAR64ROW_AVX2
-#define HAS_ARGBTOAB64ROW_AVX2
-#define HAS_AR64TOARGBROW_AVX2
-#define HAS_AB64TOARGBROW_AVX2
#define HAS_CONVERT16TO8ROW_AVX2
-#define HAS_INTERPOLATEROW_16TO8_AVX2
#define HAS_CONVERT8TO16ROW_AVX2
#define HAS_DIVIDEROW_16_AVX2
#define HAS_HALFMERGEUVROW_AVX2
-#define HAS_MERGEAR64ROW_AVX2
-#define HAS_MERGEARGB16TO8ROW_AVX2
-#define HAS_MERGEARGBROW_AVX2
-#define HAS_MERGEXR30ROW_AVX2
-#define HAS_MERGEXR64ROW_AVX2
-#define HAS_MERGEXRGB16TO8ROW_AVX2
-#define HAS_MERGEXRGBROW_AVX2
-#define HAS_NV21TOYUV24ROW_AVX2
#define HAS_I210TOAR30ROW_AVX2
#define HAS_I210TOARGBROW_AVX2
#define HAS_I212TOAR30ROW_AVX2
@@ -367,23 +361,35 @@ extern "C" {
#define HAS_I400TOARGBROW_AVX2
#define HAS_I410TOAR30ROW_AVX2
#define HAS_I410TOARGBROW_AVX2
-#define HAS_P210TOAR30ROW_AVX2
-#define HAS_P210TOARGBROW_AVX2
-#define HAS_P410TOAR30ROW_AVX2
-#define HAS_P410TOARGBROW_AVX2
#define HAS_I422TOAR30ROW_AVX2
#define HAS_I422TOUYVYROW_AVX2
#define HAS_I422TOYUY2ROW_AVX2
+#define HAS_INTERPOLATEROW_16TO8_AVX2
+#define HAS_MERGEAR64ROW_AVX2
+#define HAS_MERGEARGB16TO8ROW_AVX2
+#define HAS_MERGEARGBROW_AVX2
#define HAS_MERGEUVROW_16_AVX2
+#define HAS_MERGEXR30ROW_AVX2
+#define HAS_MERGEXR64ROW_AVX2
+#define HAS_MERGEXRGB16TO8ROW_AVX2
+#define HAS_MERGEXRGBROW_AVX2
#define HAS_MIRRORUVROW_AVX2
#define HAS_MULTIPLYROW_16_AVX2
-#if !defined(LIBYUV_BIT_EXACT)
+#define HAS_NV21TOYUV24ROW_AVX2
+#define HAS_P210TOAR30ROW_AVX2
+#define HAS_P210TOARGBROW_AVX2
+#define HAS_P410TOAR30ROW_AVX2
+#define HAS_P410TOARGBROW_AVX2
#define HAS_RGBATOYJROW_AVX2
-#endif
#define HAS_SPLITARGBROW_AVX2
-#define HAS_SPLITXRGBROW_AVX2
#define HAS_SPLITUVROW_16_AVX2
+#define HAS_SPLITXRGBROW_AVX2
#define HAS_SWAPUVROW_AVX2
+#define HAS_YUY2TONVUVROW_AVX2
+#if !defined(LIBYUV_BIT_EXACT)
+#define HAS_ABGRTOUVJROW_AVX2
+#define HAS_ABGRTOUVROW_AVX2
+#endif
#if defined(__x86_64__) || !defined(__pic__)
// TODO(fbarchard): fix build error on android_full_debug=1
@@ -397,8 +403,9 @@ extern "C" {
// TODO(fbarchard): Port to GCC and Visual C
// TODO(fbarchard): re-enable HAS_ARGBTORGB24ROW_AVX512VBMI. Issue libyuv:789
#if !defined(LIBYUV_DISABLE_X86) && \
- (defined(__x86_64__) || defined(__i386__)) && (defined(CLANG_HAS_AVX512))
+ (defined(__x86_64__) || defined(__i386__)) && defined(CLANG_HAS_AVX512)
#define HAS_ARGBTORGB24ROW_AVX512VBMI
+#define HAS_MERGEUVROW_AVX512BW
#endif
// The following are available for AVX512 clang x64 platforms:
@@ -412,7 +419,9 @@ extern "C" {
#if !defined(LIBYUV_DISABLE_NEON) && \
(defined(__aarch64__) || defined(__ARM_NEON__) || defined(LIBYUV_NEON))
#define HAS_AB64TOARGBROW_NEON
+#define HAS_ABGRTOUVJROW_NEON
#define HAS_ABGRTOUVROW_NEON
+#define HAS_ABGRTOYJROW_NEON
#define HAS_ABGRTOYROW_NEON
#define HAS_AR64TOARGBROW_NEON
#define HAS_ARGB1555TOARGBROW_NEON
@@ -444,8 +453,11 @@ extern "C" {
#define HAS_BYTETOFLOATROW_NEON
#define HAS_CONVERT16TO8ROW_NEON
#define HAS_COPYROW_NEON
+#define HAS_DETILEROW_16_NEON
#define HAS_DETILEROW_NEON
#define HAS_DETILESPLITUVROW_NEON
+#define HAS_DETILETOYUY2_NEON
+#define HAS_UNPACKMT2T_NEON
#define HAS_DIVIDEROW_16_NEON
#define HAS_HALFFLOATROW_NEON
#define HAS_HALFMERGEUVROW_NEON
@@ -461,6 +473,7 @@ extern "C" {
#define HAS_I422TOYUY2ROW_NEON
#define HAS_I444ALPHATOARGBROW_NEON
#define HAS_I444TOARGBROW_NEON
+#define HAS_I444TORGB24ROW_NEON
#define HAS_INTERPOLATEROW_16_NEON
#define HAS_INTERPOLATEROW_NEON
#define HAS_J400TOARGBROW_NEON
@@ -513,6 +526,7 @@ extern "C" {
#define HAS_UYVYTOUVROW_NEON
#define HAS_UYVYTOYROW_NEON
#define HAS_YUY2TOARGBROW_NEON
+#define HAS_YUY2TONVUVROW_NEON
#define HAS_YUY2TOUV422ROW_NEON
#define HAS_YUY2TOUVROW_NEON
#define HAS_YUY2TOYROW_NEON
@@ -524,13 +538,13 @@ extern "C" {
#define HAS_ARGBCOLORMATRIXROW_NEON
#define HAS_ARGBGRAYROW_NEON
#define HAS_ARGBMIRRORROW_NEON
-#define HAS_RGB24MIRRORROW_NEON
#define HAS_ARGBMULTIPLYROW_NEON
#define HAS_ARGBQUANTIZEROW_NEON
#define HAS_ARGBSEPIAROW_NEON
#define HAS_ARGBSHADEROW_NEON
#define HAS_ARGBSHUFFLEROW_NEON
#define HAS_ARGBSUBTRACTROW_NEON
+#define HAS_RGB24MIRRORROW_NEON
#define HAS_SOBELROW_NEON
#define HAS_SOBELTOPLANEROW_NEON
#define HAS_SOBELXROW_NEON
@@ -540,12 +554,13 @@ extern "C" {
// The following are available on AArch64 platforms:
#if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
+#define HAS_GAUSSCOL_F32_NEON
+#define HAS_GAUSSROW_F32_NEON
#define HAS_INTERPOLATEROW_16TO8_NEON
#define HAS_SCALESUMSAMPLES_NEON
-#define HAS_GAUSSROW_F32_NEON
-#define HAS_GAUSSCOL_F32_NEON
#endif
#if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa)
+#define HAS_ABGRTOUVJROW_MSA
#define HAS_ABGRTOUVROW_MSA
#define HAS_ABGRTOYROW_MSA
#define HAS_ARGB1555TOARGBROW_MSA
@@ -581,27 +596,25 @@ extern "C" {
#define HAS_BGRATOYROW_MSA
#define HAS_HALFFLOATROW_MSA
#define HAS_I400TOARGBROW_MSA
-#define HAS_I422TOUYVYROW_MSA
-#define HAS_I422TOYUY2ROW_MSA
-#define HAS_I422TOARGBROW_MSA
-#define HAS_I422TORGBAROW_MSA
#define HAS_I422ALPHATOARGBROW_MSA
+#define HAS_I422TOARGB1555ROW_MSA
+#define HAS_I422TOARGB4444ROW_MSA
+#define HAS_I422TOARGBROW_MSA
#define HAS_I422TORGB24ROW_MSA
#define HAS_I422TORGB565ROW_MSA
-#define HAS_I422TOARGB4444ROW_MSA
-#define HAS_I422TOARGB1555ROW_MSA
-#define HAS_NV12TOARGBROW_MSA
-#define HAS_NV12TORGB565ROW_MSA
-#define HAS_NV21TOARGBROW_MSA
-#define HAS_YUY2TOARGBROW_MSA
-#define HAS_UYVYTOARGBROW_MSA
+#define HAS_I422TORGBAROW_MSA
+#define HAS_I422TOUYVYROW_MSA
+#define HAS_I422TOYUY2ROW_MSA
#define HAS_I444TOARGBROW_MSA
#define HAS_INTERPOLATEROW_MSA
#define HAS_J400TOARGBROW_MSA
#define HAS_MERGEUVROW_MSA
#define HAS_MIRRORROW_MSA
-#define HAS_MIRRORUVROW_MSA
#define HAS_MIRRORSPLITUVROW_MSA
+#define HAS_MIRRORUVROW_MSA
+#define HAS_NV12TOARGBROW_MSA
+#define HAS_NV12TORGB565ROW_MSA
+#define HAS_NV21TOARGBROW_MSA
#define HAS_RAWTOARGBROW_MSA
#define HAS_RAWTORGB24ROW_MSA
#define HAS_RAWTOUVROW_MSA
@@ -621,113 +634,208 @@ extern "C" {
#define HAS_SOBELXYROW_MSA
#define HAS_SOBELYROW_MSA
#define HAS_SPLITUVROW_MSA
+#define HAS_UYVYTOARGBROW_MSA
#define HAS_UYVYTOUVROW_MSA
#define HAS_UYVYTOYROW_MSA
+#define HAS_YUY2TOARGBROW_MSA
#define HAS_YUY2TOUV422ROW_MSA
#define HAS_YUY2TOUVROW_MSA
#define HAS_YUY2TOYROW_MSA
#endif
#if !defined(LIBYUV_DISABLE_LSX) && defined(__loongarch_sx)
-#define HAS_ARGB4444TOARGBROW_LSX
+#define HAS_ABGRTOUVROW_LSX
+#define HAS_ABGRTOYROW_LSX
#define HAS_ARGB1555TOARGBROW_LSX
-#define HAS_RGB565TOARGBROW_LSX
-#define HAS_RGB24TOARGBROW_LSX
-#define HAS_RAWTOARGBROW_LSX
-#define HAS_ARGB1555TOYROW_LSX
#define HAS_ARGB1555TOUVROW_LSX
-#define HAS_RGB565TOYROW_LSX
-#define HAS_RGB565TOUVROW_LSX
-#define HAS_RGB24TOYROW_LSX
-#define HAS_RGB24TOUVROW_LSX
-#define HAS_RAWTOYROW_LSX
-#define HAS_RAWTOUVROW_LSX
+#define HAS_ARGB1555TOYROW_LSX
+#define HAS_ARGB4444TOARGBROW_LSX
+#define HAS_ARGBADDROW_LSX
+#define HAS_ARGBATTENUATEROW_LSX
+#define HAS_ARGBBLENDROW_LSX
+#define HAS_ARGBCOLORMATRIXROW_LSX
+#define HAS_ARGBEXTRACTALPHAROW_LSX
+#define HAS_ARGBGRAYROW_LSX
+#define HAS_ARGBSEPIAROW_LSX
+#define HAS_ARGBSHADEROW_LSX
+#define HAS_ARGBSHUFFLEROW_LSX
+#define HAS_ARGBSUBTRACTROW_LSX
+#define HAS_ARGBQUANTIZEROW_LSX
+#define HAS_ARGBSETROW_LSX
+#define HAS_ARGBTOARGB1555ROW_LSX
+#define HAS_ARGBTOARGB4444ROW_LSX
+#define HAS_ARGBTORAWROW_LSX
+#define HAS_ARGBTORGB24ROW_LSX
+#define HAS_ARGBTORGB565ROW_LSX
+#define HAS_ARGBTORGB565DITHERROW_LSX
+#define HAS_ARGBTOUVJROW_LSX
+#define HAS_ARGBTOUV444ROW_LSX
+#define HAS_ARGBTOUVROW_LSX
+#define HAS_ARGBTOYJROW_LSX
+#define HAS_ARGBMIRRORROW_LSX
+#define HAS_ARGBMULTIPLYROW_LSX
+#define HAS_BGRATOUVROW_LSX
+#define HAS_BGRATOYROW_LSX
+#define HAS_I400TOARGBROW_LSX
+#define HAS_I444TOARGBROW_LSX
+#define HAS_INTERPOLATEROW_LSX
+#define HAS_I422ALPHATOARGBROW_LSX
+#define HAS_I422TOARGB1555ROW_LSX
+#define HAS_I422TOARGB4444ROW_LSX
+#define HAS_I422TORGB24ROW_LSX
+#define HAS_I422TORGB565ROW_LSX
+#define HAS_I422TORGBAROW_LSX
+#define HAS_I422TOUYVYROW_LSX
+#define HAS_I422TOYUY2ROW_LSX
+#define HAS_J400TOARGBROW_LSX
+#define HAS_MERGEUVROW_LSX
+#define HAS_MIRRORROW_LSX
+#define HAS_MIRRORUVROW_LSX
+#define HAS_MIRRORSPLITUVROW_LSX
#define HAS_NV12TOARGBROW_LSX
#define HAS_NV12TORGB565ROW_LSX
#define HAS_NV21TOARGBROW_LSX
+#define HAS_RAWTOARGBROW_LSX
+#define HAS_RAWTORGB24ROW_LSX
+#define HAS_RAWTOUVROW_LSX
+#define HAS_RAWTOYROW_LSX
+#define HAS_RGB24TOARGBROW_LSX
+#define HAS_RGB24TOUVROW_LSX
+#define HAS_RGB24TOYROW_LSX
+#define HAS_RGB565TOARGBROW_LSX
+#define HAS_RGB565TOUVROW_LSX
+#define HAS_RGB565TOYROW_LSX
+#define HAS_RGBATOUVROW_LSX
+#define HAS_RGBATOYROW_LSX
+#define HAS_SETROW_LSX
#define HAS_SOBELROW_LSX
#define HAS_SOBELTOPLANEROW_LSX
#define HAS_SOBELXYROW_LSX
-#define HAS_ARGBTOYJROW_LSX
-#define HAS_BGRATOYROW_LSX
-#define HAS_BGRATOUVROW_LSX
-#define HAS_ABGRTOYROW_LSX
-#define HAS_ABGRTOUVROW_LSX
-#define HAS_RGBATOYROW_LSX
-#define HAS_RGBATOUVROW_LSX
-#define HAS_ARGBTOUVJROW_LSX
-#define HAS_I444TOARGBROW_LSX
-#define HAS_I400TOARGBROW_LSX
-#define HAS_J400TOARGBROW_LSX
-#define HAS_YUY2TOARGBROW_LSX
-#define HAS_UYVYTOARGBROW_LSX
-#define HAS_INTERPOLATEROW_LSX
-#define HAS_ARGBSETROW_LSX
-#define HAS_RAWTORGB24ROW_LSX
-#define HAS_MERGEUVROW_LSX
-#define HAS_ARGBEXTRACTALPHAROW_LSX
-#define HAS_ARGBBLENDROW_LSX
-#define HAS_ARGBQUANTIZEROW_LSX
-#define HAS_ARGBCOLORMATRIXROW_LSX
#define HAS_SPLITUVROW_LSX
-#define HAS_SETROW_LSX
-#define HAS_MIRRORSPLITUVROW_LSX
+#define HAS_UYVYTOARGBROW_LSX
+#define HAS_UYVYTOUV422ROW_LSX
+#define HAS_UYVYTOUVROW_LSX
+#define HAS_UYVYTOYROW_LSX
+#define HAS_YUY2TOARGBROW_LSX
+#define HAS_YUY2TOUVROW_LSX
+#define HAS_YUY2TOUV422ROW_LSX
+#define HAS_YUY2TOYROW_LSX
+#define HAS_ARGBTOYROW_LSX
+#define HAS_ABGRTOYJROW_LSX
+#define HAS_RGBATOYJROW_LSX
+#define HAS_RGB24TOYJROW_LSX
+#define HAS_RAWTOYJROW_LSX
+#endif
+
+#if !defined(LIBYUV_DISABLE_LSX) && defined(__loongarch_sx)
+#define HAS_I422TOARGBROW_LSX
#endif
#if !defined(LIBYUV_DISABLE_LASX) && defined(__loongarch_asx)
+#define HAS_ARGB1555TOARGBROW_LASX
+#define HAS_ARGB1555TOUVROW_LASX
+#define HAS_ARGB1555TOYROW_LASX
+#define HAS_ARGB4444TOARGBROW_LASX
+#define HAS_ARGBADDROW_LASX
+#define HAS_ARGBATTENUATEROW_LASX
+#define HAS_ARGBGRAYROW_LASX
+#define HAS_ARGBMIRRORROW_LASX
+#define HAS_ARGBMULTIPLYROW_LASX
+#define HAS_ARGBSEPIAROW_LASX
+#define HAS_ARGBSHADEROW_LASX
+#define HAS_ARGBSHUFFLEROW_LASX
+#define HAS_ARGBSUBTRACTROW_LASX
+#define HAS_ARGBTOARGB1555ROW_LASX
+#define HAS_ARGBTOARGB4444ROW_LASX
+#define HAS_ARGBTORAWROW_LASX
+#define HAS_ARGBTORGB24ROW_LASX
+#define HAS_ARGBTORGB565DITHERROW_LASX
+#define HAS_ARGBTORGB565ROW_LASX
+#define HAS_ARGBTOUV444ROW_LASX
+#define HAS_ARGBTOUVJROW_LASX
+#define HAS_ARGBTOUVROW_LASX
+#define HAS_ARGBTOYJROW_LASX
+#define HAS_ARGBTOYROW_LASX
+#define HAS_ABGRTOYJROW_LASX
+#define HAS_ABGRTOYROW_LASX
+#define HAS_I422ALPHATOARGBROW_LASX
+#define HAS_I422TOARGB1555ROW_LASX
+#define HAS_I422TOARGB4444ROW_LASX
#define HAS_I422TOARGBROW_LASX
+#define HAS_I422TORGB24ROW_LASX
+#define HAS_I422TORGB565ROW_LASX
#define HAS_I422TORGBAROW_LASX
-#define HAS_I422ALPHATOARGBROW_LASX
-#define HAS_I422TOYUY2ROW_LASX
#define HAS_I422TOUYVYROW_LASX
+#define HAS_I422TOYUY2ROW_LASX
#define HAS_MIRRORROW_LASX
#define HAS_MIRRORUVROW_LASX
-#define HAS_ARGBMIRRORROW_LASX
-#define HAS_I422TORGB24ROW_LASX
-#define HAS_I422TORGB565ROW_LASX
-#define HAS_I422TOARGB4444ROW_LASX
-#define HAS_I422TOARGB1555ROW_LASX
-#define HAS_YUY2TOUVROW_LASX
-#define HAS_YUY2TOYROW_LASX
-#define HAS_YUY2TOUV422ROW_LASX
-#define HAS_UYVYTOYROW_LASX
-#define HAS_UYVYTOUVROW_LASX
-#define HAS_UYVYTOUV422ROW_LASX
-#define HAS_ARGBTOYROW_LASX
-#define HAS_ARGBTOUVROW_LASX
-#define HAS_ARGBTORGB24ROW_LASX
-#define HAS_ARGBTORAWROW_LASX
-#define HAS_ARGBTORGB565ROW_LASX
-#define HAS_ARGBTOARGB1555ROW_LASX
-#define HAS_ARGBTOARGB4444ROW_LASX
-#define HAS_ARGBTOUV444ROW_LASX
-#define HAS_ARGBMULTIPLYROW_LASX
-#define HAS_ARGBADDROW_LASX
-#define HAS_ARGBSUBTRACTROW_LASX
-#define HAS_ARGBATTENUATEROW_LASX
-#define HAS_ARGBTORGB565DITHERROW_LASX
-#define HAS_ARGBSHUFFLEROW_LASX
-#define HAS_ARGBSHADEROW_LASX
-#define HAS_ARGBGRAYROW_LASX
-#define HAS_ARGBSEPIAROW_LASX
-#define HAS_ARGB4444TOARGBROW_LASX
-#define HAS_ARGB1555TOARGBROW_LASX
-#define HAS_RGB565TOARGBROW_LASX
-#define HAS_RGB24TOARGBROW_LASX
-#define HAS_RAWTOARGBROW_LASX
-#define HAS_ARGB1555TOYROW_LASX
-#define HAS_ARGB1555TOUVROW_LASX
-#define HAS_RGB565TOYROW_LASX
-#define HAS_RGB565TOUVROW_LASX
-#define HAS_RGB24TOYROW_LASX
-#define HAS_RGB24TOUVROW_LASX
-#define HAS_RAWTOYROW_LASX
-#define HAS_RAWTOUVROW_LASX
#define HAS_NV12TOARGBROW_LASX
#define HAS_NV12TORGB565ROW_LASX
#define HAS_NV21TOARGBROW_LASX
-#define HAS_ARGBTOYJROW_LASX
-#define HAS_ARGBTOUVJROW_LASX
+#define HAS_RAWTOARGBROW_LASX
+#define HAS_RAWTOUVROW_LASX
+#define HAS_RAWTOYROW_LASX
+#define HAS_RGB24TOARGBROW_LASX
+#define HAS_RGB24TOUVROW_LASX
+#define HAS_RGB24TOYROW_LASX
+#define HAS_RGB565TOARGBROW_LASX
+#define HAS_RGB565TOUVROW_LASX
+#define HAS_RGB565TOYROW_LASX
+#define HAS_UYVYTOUV422ROW_LASX
+#define HAS_UYVYTOUVROW_LASX
+#define HAS_UYVYTOYROW_LASX
+#define HAS_YUY2TOUV422ROW_LASX
+#define HAS_YUY2TOUVROW_LASX
+#define HAS_YUY2TOYROW_LASX
+#define HAS_RGBATOYROW_LASX
+#define HAS_RGBATOYJROW_LASX
+#define HAS_BGRATOYROW_LASX
+#define HAS_RGB24TOYJROW_LASX
+#define HAS_RAWTOYJROW_LASX
+#endif
+
+#if !defined(LIBYUV_DISABLE_RVV) && defined(__riscv_vector)
+#define HAS_AB64TOARGBROW_RVV
+#define HAS_AR64TOARGBROW_RVV
+#define HAS_ARGBATTENUATEROW_RVV
+#define HAS_ARGBTOAB64ROW_RVV
+#define HAS_ARGBTOAR64ROW_RVV
+#define HAS_ARGBTORAWROW_RVV
+#define HAS_ARGBTORGB24ROW_RVV
+#define HAS_ARGBTOYROW_RVV
+#define HAS_ARGBTOYJROW_RVV
+#define HAS_ABGRTOYROW_RVV
+#define HAS_ABGRTOYJROW_RVV
+#define HAS_BGRATOYROW_RVV
+#define HAS_COPYROW_RVV
+#define HAS_I400TOARGBROW_RVV
+#define HAS_I422ALPHATOARGBROW_RVV
+#define HAS_I422TOARGBROW_RVV
+#define HAS_I422TORGB24ROW_RVV
+#define HAS_I422TORGBAROW_RVV
+#define HAS_I444ALPHATOARGBROW_RVV
+#define HAS_I444TOARGBROW_RVV
+#define HAS_I444TORGB24ROW_RVV
+#define HAS_INTERPOLATEROW_RVV
+#define HAS_J400TOARGBROW_RVV
+#define HAS_MERGEARGBROW_RVV
+#define HAS_MERGERGBROW_RVV
+#define HAS_MERGEUVROW_RVV
+#define HAS_MERGEXRGBROW_RVV
+#define HAS_SPLITARGBROW_RVV
+#define HAS_SPLITRGBROW_RVV
+#define HAS_SPLITUVROW_RVV
+#define HAS_SPLITXRGBROW_RVV
+#define HAS_RAWTOARGBROW_RVV
+#define HAS_RAWTORGB24ROW_RVV
+#define HAS_RAWTORGBAROW_RVV
+#define HAS_RAWTOYJROW_RVV
+#define HAS_RAWTOYROW_RVV
+#define HAS_RGB24TOARGBROW_RVV
+#define HAS_RGB24TOYJROW_RVV
+#define HAS_RGB24TOYROW_RVV
+#define HAS_RGBATOYROW_RVV
+#define HAS_RGBATOYJROW_RVV
#endif
#if defined(_MSC_VER) && !defined(__CLR_VER) && !defined(__clang__)
@@ -789,8 +897,8 @@ typedef uint32_t ulvec32[8];
typedef uint8_t ulvec8[32];
#endif
-#if defined(__aarch64__) || defined(__arm__)
-// This struct is for ARM color conversion.
+#if defined(__aarch64__) || defined(__arm__) || defined(__riscv)
+// This struct is for ARM and RISC-V color conversion.
struct YuvConstants {
uvec8 kUVCoeff;
vec16 kRGBCoeffBias;
@@ -816,13 +924,21 @@ struct YuvConstants {
#define IS_ALIGNED(p, a) (!((uintptr_t)(p) & ((a)-1)))
-#define align_buffer_64(var, size) \
- uint8_t* var##_mem = (uint8_t*)(malloc((size) + 63)); /* NOLINT */ \
- uint8_t* var = (uint8_t*)(((intptr_t)(var##_mem) + 63) & ~63) /* NOLINT */
+#define align_buffer_64(var, size) \
+ void* var##_mem = malloc((size) + 63); /* NOLINT */ \
+ uint8_t* var = (uint8_t*)(((intptr_t)var##_mem + 63) & ~63) /* NOLINT */
#define free_aligned_buffer_64(var) \
free(var##_mem); \
- var = 0
+ var = NULL
+
+#define align_buffer_64_16(var, size) \
+ void* var##_mem = malloc((size)*2 + 63); /* NOLINT */ \
+ uint16_t* var = (uint16_t*)(((intptr_t)var##_mem + 63) & ~63) /* NOLINT */
+
+#define free_aligned_buffer_64_16(var) \
+ free(var##_mem); \
+ var = NULL
#if defined(__APPLE__) || defined(__x86_64__) || defined(__llvm__)
#define OMITFP
@@ -894,6 +1010,12 @@ void I444ToARGBRow_NEON(const uint8_t* src_y,
uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
+void I444ToRGB24Row_NEON(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_rgb24,
+ const struct YuvConstants* yuvconstants,
+ int width);
void I422ToARGBRow_NEON(const uint8_t* src_y,
const uint8_t* src_u,
const uint8_t* src_v,
@@ -981,6 +1103,50 @@ void UYVYToARGBRow_NEON(const uint8_t* src_uyvy,
uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
+void I444ToARGBRow_RVV(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void I444AlphaToARGBRow_RVV(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ const uint8_t* src_a,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void I444ToRGB24Row_RVV(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_rgb24,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void I422ToARGBRow_RVV(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void I422AlphaToARGBRow_RVV(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ const uint8_t* src_a,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void I422ToRGBARow_RVV(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_rgba,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void I422ToRGB24Row_RVV(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_rgb24,
+ const struct YuvConstants* yuvconstants,
+ int width);
void I444ToARGBRow_MSA(const uint8_t* src_y,
const uint8_t* src_u,
const uint8_t* src_v,
@@ -1000,6 +1166,12 @@ void I422ToARGBRow_MSA(const uint8_t* src_y,
uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
+void I422ToARGBRow_LSX(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width);
void I422ToARGBRow_LASX(const uint8_t* src_y,
const uint8_t* src_u,
const uint8_t* src_v,
@@ -1012,6 +1184,12 @@ void I422ToRGBARow_MSA(const uint8_t* src_y,
uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
+void I422ToRGBARow_LSX(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width);
void I422ToRGBARow_LASX(const uint8_t* src_y,
const uint8_t* src_u,
const uint8_t* src_v,
@@ -1025,6 +1203,13 @@ void I422AlphaToARGBRow_MSA(const uint8_t* src_y,
uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
+void I422AlphaToARGBRow_LSX(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ const uint8_t* src_a,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width);
void I422AlphaToARGBRow_LASX(const uint8_t* src_y,
const uint8_t* src_u,
const uint8_t* src_v,
@@ -1038,6 +1223,12 @@ void I422ToRGB24Row_MSA(const uint8_t* src_y,
uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
+void I422ToRGB24Row_LSX(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width);
void I422ToRGB24Row_LASX(const uint8_t* src_y,
const uint8_t* src_u,
const uint8_t* src_v,
@@ -1050,6 +1241,12 @@ void I422ToRGB565Row_MSA(const uint8_t* src_y,
uint8_t* dst_rgb565,
const struct YuvConstants* yuvconstants,
int width);
+void I422ToRGB565Row_LSX(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_rgb565,
+ const struct YuvConstants* yuvconstants,
+ int width);
void I422ToRGB565Row_LASX(const uint8_t* src_y,
const uint8_t* src_u,
const uint8_t* src_v,
@@ -1062,6 +1259,12 @@ void I422ToARGB4444Row_MSA(const uint8_t* src_y,
uint8_t* dst_argb4444,
const struct YuvConstants* yuvconstants,
int width);
+void I422ToARGB4444Row_LSX(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_argb4444,
+ const struct YuvConstants* yuvconstants,
+ int width);
void I422ToARGB4444Row_LASX(const uint8_t* src_y,
const uint8_t* src_u,
const uint8_t* src_v,
@@ -1074,6 +1277,12 @@ void I422ToARGB1555Row_MSA(const uint8_t* src_y,
uint8_t* dst_argb1555,
const struct YuvConstants* yuvconstants,
int width);
+void I422ToARGB1555Row_LSX(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_argb1555,
+ const struct YuvConstants* yuvconstants,
+ int width);
void I422ToARGB1555Row_LASX(const uint8_t* src_y,
const uint8_t* src_u,
const uint8_t* src_v,
@@ -1148,9 +1357,13 @@ void ARGBToYRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void ABGRToYRow_AVX2(const uint8_t* src_abgr, uint8_t* dst_y, int width);
void ABGRToYRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void ARGBToYRow_SSSE3(const uint8_t* src_argb, uint8_t* dst_y, int width);
+void ARGBToYJRow_SSSE3(const uint8_t* src_argb, uint8_t* dst_y, int width);
void ARGBToYJRow_AVX2(const uint8_t* src_argb, uint8_t* dst_y, int width);
void ARGBToYJRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
-void ARGBToYJRow_SSSE3(const uint8_t* src_argb, uint8_t* dst_y, int width);
+void ABGRToYRow_SSSE3(const uint8_t* src_abgr, uint8_t* dst_y, int width);
+void ABGRToYJRow_SSSE3(const uint8_t* src_abgr, uint8_t* dst_y, int width);
+void ABGRToYJRow_AVX2(const uint8_t* src_abgr, uint8_t* dst_y, int width);
+void ABGRToYJRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void RGBAToYJRow_AVX2(const uint8_t* src_rgba, uint8_t* dst_y, int width);
void RGBAToYJRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void RGBAToYJRow_SSSE3(const uint8_t* src_rgba, uint8_t* dst_y, int width);
@@ -1164,13 +1377,23 @@ void RAWToYJRow_SSSE3(const uint8_t* src_raw, uint8_t* dst_yj, int width);
void RGB24ToYJRow_AVX2(const uint8_t* src_rgb24, uint8_t* dst_yj, int width);
void RAWToYJRow_AVX2(const uint8_t* src_raw, uint8_t* dst_yj, int width);
void ARGBToYRow_NEON(const uint8_t* src_argb, uint8_t* dst_y, int width);
-void ARGBToYJRow_NEON(const uint8_t* src_argb, uint8_t* dst_y, int width);
-void RGBAToYJRow_NEON(const uint8_t* src_rgba, uint8_t* dst_y, int width);
+void ARGBToYJRow_NEON(const uint8_t* src_argb, uint8_t* dst_yj, int width);
+void ABGRToYJRow_NEON(const uint8_t* src_abgr, uint8_t* dst_yj, int width);
+void RGBAToYJRow_NEON(const uint8_t* src_rgba, uint8_t* dst_yj, int width);
+void ARGBToYRow_RVV(const uint8_t* src_argb, uint8_t* dst_y, int width);
+void ARGBToYJRow_RVV(const uint8_t* src_argb, uint8_t* dst_yj, int width);
+void ABGRToYJRow_RVV(const uint8_t* src_rgba, uint8_t* dst_yj, int width);
+void RGBAToYJRow_RVV(const uint8_t* src_rgba, uint8_t* dst_yj, int width);
void ARGBToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width);
void ARGBToYJRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width);
+void ARGBToYRow_LSX(const uint8_t* src_argb0, uint8_t* dst_y, int width);
void ARGBToYRow_LASX(const uint8_t* src_argb0, uint8_t* dst_y, int width);
void ARGBToYJRow_LSX(const uint8_t* src_argb0, uint8_t* dst_y, int width);
+void ABGRToYJRow_LSX(const uint8_t* src_abgr, uint8_t* dst_yj, int width);
+void RGBAToYJRow_LSX(const uint8_t* src_rgba, uint8_t* dst_yj, int width);
void ARGBToYJRow_LASX(const uint8_t* src_argb0, uint8_t* dst_y, int width);
+void ABGRToYJRow_LASX(const uint8_t* src_abgr, uint8_t* dst_yj, int width);
+void RGBAToYJRow_LASX(const uint8_t* src_rgba, uint8_t* dst_yj, int width);
void ARGBToUV444Row_NEON(const uint8_t* src_argb,
uint8_t* dst_u,
uint8_t* dst_v,
@@ -1189,11 +1412,20 @@ void ARGBToUVRow_MSA(const uint8_t* src_argb,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
+void ARGBToUVRow_LSX(const uint8_t* src_argb,
+ int src_stride_argb,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
void ARGBToUVRow_LASX(const uint8_t* src_argb,
int src_stride_argb,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
+void ARGBToUV444Row_LSX(const uint8_t* src_argb,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
void ARGBToUV444Row_LASX(const uint8_t* src_argb,
uint8_t* dst_u,
uint8_t* dst_v,
@@ -1203,6 +1435,11 @@ void ARGBToUVJRow_NEON(const uint8_t* src_argb,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
+void ABGRToUVJRow_NEON(const uint8_t* src_abgr,
+ int src_stride_abgr,
+ uint8_t* dst_uj,
+ uint8_t* dst_vj,
+ int width);
void BGRAToUVRow_NEON(const uint8_t* src_bgra,
int src_stride_bgra,
uint8_t* dst_u,
@@ -1258,6 +1495,11 @@ void ARGBToUVJRow_MSA(const uint8_t* src_rgb,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
+void ABGRToUVJRow_MSA(const uint8_t* src_rgb,
+ int src_stride_rgb,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
void BGRAToUVRow_MSA(const uint8_t* src_rgb,
int src_stride_rgb,
uint8_t* dst_u,
@@ -1372,6 +1614,13 @@ void ARGB1555ToYRow_NEON(const uint8_t* src_argb1555,
void ARGB4444ToYRow_NEON(const uint8_t* src_argb4444,
uint8_t* dst_y,
int width);
+void BGRAToYRow_RVV(const uint8_t* src_bgra, uint8_t* dst_y, int width);
+void ABGRToYRow_RVV(const uint8_t* src_abgr, uint8_t* dst_y, int width);
+void RGBAToYRow_RVV(const uint8_t* src_rgba, uint8_t* dst_y, int width);
+void RGB24ToYRow_RVV(const uint8_t* src_rgb24, uint8_t* dst_y, int width);
+void RGB24ToYJRow_RVV(const uint8_t* src_rgb24, uint8_t* dst_yj, int width);
+void RAWToYRow_RVV(const uint8_t* src_raw, uint8_t* dst_y, int width);
+void RAWToYJRow_RVV(const uint8_t* src_raw, uint8_t* dst_yj, int width);
void BGRAToYRow_MSA(const uint8_t* src_argb, uint8_t* dst_y, int width);
void ABGRToYRow_MSA(const uint8_t* src_argb, uint8_t* dst_y, int width);
void RGBAToYRow_MSA(const uint8_t* src_argb, uint8_t* dst_y, int width);
@@ -1384,6 +1633,8 @@ void BGRAToYRow_LSX(const uint8_t* src_bgra, uint8_t* dst_y, int width);
void ABGRToYRow_LSX(const uint8_t* src_abgr, uint8_t* dst_y, int width);
void RGBAToYRow_LSX(const uint8_t* src_rgba, uint8_t* dst_y, int width);
void ARGB1555ToYRow_LSX(const uint8_t* src_argb1555, uint8_t* dst_y, int width);
+void RGB24ToYJRow_LSX(const uint8_t* src_rgb24, uint8_t* dst_yj, int width);
+void ABGRToYRow_LASX(const uint8_t* src_abgr, uint8_t* dst_y, int width);
void ARGB1555ToYRow_LASX(const uint8_t* src_argb1555,
uint8_t* dst_y,
int width);
@@ -1393,9 +1644,15 @@ void RGB24ToYRow_LSX(const uint8_t* src_rgb24, uint8_t* dst_y, int width);
void RGB24ToYRow_LASX(const uint8_t* src_rgb24, uint8_t* dst_y, int width);
void RAWToYRow_LSX(const uint8_t* src_raw, uint8_t* dst_y, int width);
void RAWToYRow_LASX(const uint8_t* src_raw, uint8_t* dst_y, int width);
+void RGBAToYRow_LASX(const uint8_t* src_rgba, uint8_t* dst_y, int width);
+void BGRAToYRow_LASX(const uint8_t* src_bgra, uint8_t* dst_y, int width);
+void RGB24ToYJRow_LASX(const uint8_t* src_rgb24, uint8_t* dst_yj, int width);
+void RAWToYJRow_LSX(const uint8_t* src_raw, uint8_t* dst_yj, int width);
+void RAWToYJRow_LASX(const uint8_t* src_raw, uint8_t* dst_yj, int width);
void ARGBToYRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width);
void ARGBToYJRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width);
+void ABGRToYJRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width);
void RGBAToYJRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width);
void BGRAToYRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width);
void ABGRToYRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width);
@@ -1409,6 +1666,7 @@ void ARGB1555ToYRow_C(const uint8_t* src_argb1555, uint8_t* dst_y, int width);
void ARGB4444ToYRow_C(const uint8_t* src_argb4444, uint8_t* dst_y, int width);
void ARGBToYRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void ARGBToYJRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void ABGRToYJRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void RGBAToYJRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void BGRAToYRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void ABGRToYRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
@@ -1423,6 +1681,7 @@ void RGB24ToYJRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void RAWToYJRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void ARGBToYRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void ARGBToYJRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void ABGRToYJRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void RGBAToYJRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void BGRAToYRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void ABGRToYRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
@@ -1453,10 +1712,15 @@ void ARGB1555ToYRow_Any_MSA(const uint8_t* src_ptr,
void BGRAToYRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void ABGRToYRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void RGBAToYRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void ARGBToYRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void ARGBToYJRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void RGB24ToYRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void RGB565ToYRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void ABGRToYJRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void RAWToYRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void RGBAToYJRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void RGB24ToYJRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void RAWToYJRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void ARGB1555ToYRow_Any_LSX(const uint8_t* src_ptr,
uint8_t* dst_ptr,
int width);
@@ -1465,7 +1729,14 @@ void RGB565ToYRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void RGB24ToYRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void ARGBToYJRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void ARGBToYRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void ABGRToYRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void ABGRToYJRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void RAWToYRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void RGBAToYRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void RGBAToYJRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void BGRAToYRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void RGB24ToYJRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void RAWToYJRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void ARGB1555ToYRow_Any_LASX(const uint8_t* src_ptr,
uint8_t* dst_ptr,
int width);
@@ -1485,6 +1756,11 @@ void ARGBToUVJRow_AVX2(const uint8_t* src_argb,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
+void ABGRToUVJRow_AVX2(const uint8_t* src_abgr,
+ int src_stride_abgr,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
void ARGBToUVRow_SSSE3(const uint8_t* src_argb,
int src_stride_argb,
uint8_t* dst_u,
@@ -1495,6 +1771,11 @@ void ARGBToUVJRow_SSSE3(const uint8_t* src_argb,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
+void ABGRToUVJRow_SSSE3(const uint8_t* src_abgr,
+ int src_stride_abgr,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
void BGRAToUVRow_SSSE3(const uint8_t* src_bgra,
int src_stride_bgra,
uint8_t* dst_u,
@@ -1525,6 +1806,11 @@ void ARGBToUVJRow_Any_AVX2(const uint8_t* src_ptr,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
+void ABGRToUVJRow_Any_AVX2(const uint8_t* src_ptr,
+ int src_stride,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
void ARGBToUVRow_Any_SSSE3(const uint8_t* src_ptr,
int src_stride,
uint8_t* dst_u,
@@ -1535,6 +1821,11 @@ void ARGBToUVJRow_Any_SSSE3(const uint8_t* src_ptr,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
+void ABGRToUVJRow_Any_SSSE3(const uint8_t* src_ptr,
+ int src_stride,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
void BGRAToUVRow_Any_SSSE3(const uint8_t* src_ptr,
int src_stride,
uint8_t* dst_u,
@@ -1568,11 +1859,20 @@ void ARGBToUVRow_Any_MSA(const uint8_t* src_ptr,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
+void ARGBToUVRow_Any_LSX(const uint8_t* src_ptr,
+ int src_stride_ptr,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
void ARGBToUVRow_Any_LASX(const uint8_t* src_ptr,
int src_stride_ptr,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
+void ARGBToUV444Row_Any_LSX(const uint8_t* src_ptr,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
void ARGBToUV444Row_Any_LASX(const uint8_t* src_ptr,
uint8_t* dst_u,
uint8_t* dst_v,
@@ -1582,6 +1882,11 @@ void ARGBToUVJRow_Any_NEON(const uint8_t* src_ptr,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
+void ABGRToUVJRow_Any_NEON(const uint8_t* src_ptr,
+ int src_stride,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
void BGRAToUVRow_Any_NEON(const uint8_t* src_ptr,
int src_stride,
uint8_t* dst_u,
@@ -1747,16 +2052,16 @@ void ARGBToUVJRow_C(const uint8_t* src_rgb,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
+void ABGRToUVJRow_C(const uint8_t* src_rgb,
+ int src_stride_rgb,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
void ARGBToUVRow_C(const uint8_t* src_rgb,
int src_stride_rgb,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
-void ARGBToUVJRow_C(const uint8_t* src_rgb,
- int src_stride_rgb,
- uint8_t* dst_u,
- uint8_t* dst_v,
- int width);
void BGRAToUVRow_C(const uint8_t* src_rgb,
int src_stride_rgb,
uint8_t* dst_u,
@@ -1772,6 +2077,11 @@ void RGBAToUVRow_C(const uint8_t* src_rgb,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
+void RGBAToUVJRow_C(const uint8_t* src_rgb,
+ int src_stride_rgb,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
void RGB24ToUVRow_C(const uint8_t* src_rgb,
int src_stride_rgb,
uint8_t* dst_u,
@@ -1826,6 +2136,7 @@ void MirrorRow_AVX2(const uint8_t* src, uint8_t* dst, int width);
void MirrorRow_SSSE3(const uint8_t* src, uint8_t* dst, int width);
void MirrorRow_NEON(const uint8_t* src, uint8_t* dst, int width);
void MirrorRow_MSA(const uint8_t* src, uint8_t* dst, int width);
+void MirrorRow_LSX(const uint8_t* src, uint8_t* dst, int width);
void MirrorRow_LASX(const uint8_t* src, uint8_t* dst, int width);
void MirrorRow_C(const uint8_t* src, uint8_t* dst, int width);
void MirrorRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
@@ -1833,17 +2144,20 @@ void MirrorRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void MirrorRow_Any_SSE2(const uint8_t* src, uint8_t* dst, int width);
void MirrorRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void MirrorRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void MirrorRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void MirrorRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void MirrorUVRow_AVX2(const uint8_t* src_uv, uint8_t* dst_uv, int width);
void MirrorUVRow_SSSE3(const uint8_t* src_uv, uint8_t* dst_uv, int width);
void MirrorUVRow_NEON(const uint8_t* src_uv, uint8_t* dst_uv, int width);
void MirrorUVRow_MSA(const uint8_t* src_uv, uint8_t* dst_uv, int width);
+void MirrorUVRow_LSX(const uint8_t* src_uv, uint8_t* dst_uv, int width);
void MirrorUVRow_LASX(const uint8_t* src_uv, uint8_t* dst_uv, int width);
void MirrorUVRow_C(const uint8_t* src_uv, uint8_t* dst_uv, int width);
void MirrorUVRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void MirrorUVRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void MirrorUVRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void MirrorUVRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void MirrorUVRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void MirrorUVRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void MirrorSplitUVRow_SSSE3(const uint8_t* src,
@@ -1867,10 +2181,13 @@ void MirrorSplitUVRow_C(const uint8_t* src_uv,
uint8_t* dst_v,
int width);
+void MirrorRow_16_C(const uint16_t* src, uint16_t* dst, int width);
+
void ARGBMirrorRow_AVX2(const uint8_t* src, uint8_t* dst, int width);
void ARGBMirrorRow_SSE2(const uint8_t* src, uint8_t* dst, int width);
void ARGBMirrorRow_NEON(const uint8_t* src_argb, uint8_t* dst_argb, int width);
void ARGBMirrorRow_MSA(const uint8_t* src, uint8_t* dst, int width);
+void ARGBMirrorRow_LSX(const uint8_t* src, uint8_t* dst, int width);
void ARGBMirrorRow_LASX(const uint8_t* src, uint8_t* dst, int width);
void ARGBMirrorRow_C(const uint8_t* src, uint8_t* dst, int width);
void ARGBMirrorRow_Any_AVX2(const uint8_t* src_ptr,
@@ -1883,6 +2200,7 @@ void ARGBMirrorRow_Any_NEON(const uint8_t* src_ptr,
uint8_t* dst_ptr,
int width);
void ARGBMirrorRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void ARGBMirrorRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void ARGBMirrorRow_Any_LASX(const uint8_t* src_ptr,
uint8_t* dst_ptr,
int width);
@@ -1925,6 +2243,10 @@ void SplitUVRow_LSX(const uint8_t* src_uv,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
+void SplitUVRow_RVV(const uint8_t* src_uv,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
void SplitUVRow_Any_SSE2(const uint8_t* src_ptr,
uint8_t* dst_u,
uint8_t* dst_v,
@@ -1949,7 +2271,6 @@ void DetileRow_C(const uint8_t* src,
ptrdiff_t src_tile_stride,
uint8_t* dst,
int width);
-
void DetileRow_NEON(const uint8_t* src,
ptrdiff_t src_tile_stride,
uint8_t* dst,
@@ -1966,6 +2287,42 @@ void DetileRow_Any_SSE2(const uint8_t* src,
ptrdiff_t src_tile_stride,
uint8_t* dst,
int width);
+void DetileRow_AVX(const uint8_t* src,
+ ptrdiff_t src_tile_stride,
+ uint8_t* dst,
+ int width);
+void DetileRow_Any_AVX(const uint8_t* src,
+ ptrdiff_t src_tile_stride,
+ uint8_t* dst,
+ int width);
+void DetileRow_16_C(const uint16_t* src,
+ ptrdiff_t src_tile_stride,
+ uint16_t* dst,
+ int width);
+void DetileRow_16_NEON(const uint16_t* src,
+ ptrdiff_t src_tile_stride,
+ uint16_t* dst,
+ int width);
+void DetileRow_16_Any_NEON(const uint16_t* src,
+ ptrdiff_t src_tile_stride,
+ uint16_t* dst,
+ int width);
+void DetileRow_16_SSE2(const uint16_t* src,
+ ptrdiff_t src_tile_stride,
+ uint16_t* dst,
+ int width);
+void DetileRow_16_Any_SSE2(const uint16_t* src,
+ ptrdiff_t src_tile_stride,
+ uint16_t* dst,
+ int width);
+void DetileRow_16_AVX(const uint16_t* src,
+ ptrdiff_t src_tile_stride,
+ uint16_t* dst,
+ int width);
+void DetileRow_16_Any_AVX(const uint16_t* src,
+ ptrdiff_t src_tile_stride,
+ uint16_t* dst,
+ int width);
void DetileSplitUVRow_C(const uint8_t* src_uv,
ptrdiff_t src_tile_stride,
uint8_t* dst_u,
@@ -1991,6 +2348,38 @@ void DetileSplitUVRow_Any_NEON(const uint8_t* src_uv,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
+void DetileToYUY2_C(const uint8_t* src_y,
+ ptrdiff_t src_y_tile_stride,
+ const uint8_t* src_uv,
+ ptrdiff_t src_uv_tile_stride,
+ uint8_t* dst_yuy2,
+ int width);
+void DetileToYUY2_SSE2(const uint8_t* src_y,
+ ptrdiff_t src_y_tile_stride,
+ const uint8_t* src_uv,
+ ptrdiff_t src_uv_tile_stride,
+ uint8_t* dst_yuy2,
+ int width);
+void DetileToYUY2_Any_SSE2(const uint8_t* src_y,
+ ptrdiff_t src_y_tile_stride,
+ const uint8_t* src_uv,
+ ptrdiff_t src_uv_tile_stride,
+ uint8_t* dst_yuy2,
+ int width);
+void DetileToYUY2_NEON(const uint8_t* src_y,
+ ptrdiff_t src_y_tile_stride,
+ const uint8_t* src_uv,
+ ptrdiff_t src_uv_tile_stride,
+ uint8_t* dst_yuy2,
+ int width);
+void DetileToYUY2_Any_NEON(const uint8_t* src_y,
+ ptrdiff_t src_y_tile_stride,
+ const uint8_t* src_uv,
+ ptrdiff_t src_uv_tile_stride,
+ uint8_t* dst_yuy2,
+ int width);
+void UnpackMT2T_C(const uint8_t* src, uint16_t* dst, size_t size);
+void UnpackMT2T_NEON(const uint8_t* src, uint16_t* dst, size_t size);
void MergeUVRow_C(const uint8_t* src_u,
const uint8_t* src_v,
uint8_t* dst_uv,
@@ -2003,6 +2392,10 @@ void MergeUVRow_AVX2(const uint8_t* src_u,
const uint8_t* src_v,
uint8_t* dst_uv,
int width);
+void MergeUVRow_AVX512BW(const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_uv,
+ int width);
void MergeUVRow_NEON(const uint8_t* src_u,
const uint8_t* src_v,
uint8_t* dst_uv,
@@ -2015,6 +2408,10 @@ void MergeUVRow_LSX(const uint8_t* src_u,
const uint8_t* src_v,
uint8_t* dst_uv,
int width);
+void MergeUVRow_RVV(const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_uv,
+ int width);
void MergeUVRow_Any_SSE2(const uint8_t* y_buf,
const uint8_t* uv_buf,
uint8_t* dst_ptr,
@@ -2023,6 +2420,10 @@ void MergeUVRow_Any_AVX2(const uint8_t* y_buf,
const uint8_t* uv_buf,
uint8_t* dst_ptr,
int width);
+void MergeUVRow_Any_AVX512BW(const uint8_t* y_buf,
+ const uint8_t* uv_buf,
+ uint8_t* dst_ptr,
+ int width);
void MergeUVRow_Any_NEON(const uint8_t* y_buf,
const uint8_t* uv_buf,
uint8_t* dst_ptr,
@@ -2079,6 +2480,11 @@ void SplitRGBRow_NEON(const uint8_t* src_rgb,
uint8_t* dst_g,
uint8_t* dst_b,
int width);
+void SplitRGBRow_RVV(const uint8_t* src_rgb,
+ uint8_t* dst_r,
+ uint8_t* dst_g,
+ uint8_t* dst_b,
+ int width);
void SplitRGBRow_Any_SSSE3(const uint8_t* src_ptr,
uint8_t* dst_r,
uint8_t* dst_g,
@@ -2105,6 +2511,11 @@ void MergeRGBRow_NEON(const uint8_t* src_r,
const uint8_t* src_b,
uint8_t* dst_rgb,
int width);
+void MergeRGBRow_RVV(const uint8_t* src_r,
+ const uint8_t* src_g,
+ const uint8_t* src_b,
+ uint8_t* dst_rgb,
+ int width);
void MergeRGBRow_Any_SSSE3(const uint8_t* y_buf,
const uint8_t* u_buf,
const uint8_t* v_buf,
@@ -2139,6 +2550,12 @@ void MergeARGBRow_NEON(const uint8_t* src_r,
const uint8_t* src_a,
uint8_t* dst_argb,
int width);
+void MergeARGBRow_RVV(const uint8_t* src_r,
+ const uint8_t* src_g,
+ const uint8_t* src_b,
+ const uint8_t* src_a,
+ uint8_t* dst_argb,
+ int width);
void MergeARGBRow_Any_SSE2(const uint8_t* y_buf,
const uint8_t* u_buf,
const uint8_t* v_buf,
@@ -2187,6 +2604,12 @@ void SplitARGBRow_NEON(const uint8_t* src_rgba,
uint8_t* dst_b,
uint8_t* dst_a,
int width);
+void SplitARGBRow_RVV(const uint8_t* src_rgba,
+ uint8_t* dst_r,
+ uint8_t* dst_g,
+ uint8_t* dst_b,
+ uint8_t* dst_a,
+ int width);
void SplitARGBRow_Any_SSE2(const uint8_t* src_ptr,
uint8_t* dst_r,
uint8_t* dst_g,
@@ -2231,6 +2654,11 @@ void MergeXRGBRow_NEON(const uint8_t* src_r,
const uint8_t* src_b,
uint8_t* dst_argb,
int width);
+void MergeXRGBRow_RVV(const uint8_t* src_r,
+ const uint8_t* src_g,
+ const uint8_t* src_b,
+ uint8_t* dst_argb,
+ int width);
void MergeXRGBRow_Any_SSE2(const uint8_t* y_buf,
const uint8_t* u_buf,
const uint8_t* v_buf,
@@ -2271,6 +2699,11 @@ void SplitXRGBRow_NEON(const uint8_t* src_rgba,
uint8_t* dst_g,
uint8_t* dst_b,
int width);
+void SplitXRGBRow_RVV(const uint8_t* src_rgba,
+ uint8_t* dst_r,
+ uint8_t* dst_g,
+ uint8_t* dst_b,
+ int width);
void SplitXRGBRow_Any_SSE2(const uint8_t* src_ptr,
uint8_t* dst_r,
uint8_t* dst_g,
@@ -2604,8 +3037,8 @@ void Convert16To8Row_NEON(const uint16_t* src_y,
uint8_t* dst_y,
int scale,
int width);
-void Convert16To8Row_Any_NEON(const uint16_t* src_y,
- uint8_t* dst_y,
+void Convert16To8Row_Any_NEON(const uint16_t* src_ptr,
+ uint8_t* dst_ptr,
int scale,
int width);
@@ -2614,6 +3047,7 @@ void CopyRow_AVX(const uint8_t* src, uint8_t* dst, int width);
void CopyRow_ERMS(const uint8_t* src, uint8_t* dst, int width);
void CopyRow_NEON(const uint8_t* src, uint8_t* dst, int width);
void CopyRow_MIPS(const uint8_t* src, uint8_t* dst, int count);
+void CopyRow_RVV(const uint8_t* src, uint8_t* dst, int count);
void CopyRow_C(const uint8_t* src, uint8_t* dst, int count);
void CopyRow_Any_SSE2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void CopyRow_Any_AVX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
@@ -2713,6 +3147,10 @@ void ARGBShuffleRow_MSA(const uint8_t* src_argb,
uint8_t* dst_argb,
const uint8_t* shuffler,
int width);
+void ARGBShuffleRow_LSX(const uint8_t* src_argb,
+ uint8_t* dst_argb,
+ const uint8_t* shuffler,
+ int width);
void ARGBShuffleRow_LASX(const uint8_t* src_argb,
uint8_t* dst_argb,
const uint8_t* shuffler,
@@ -2733,6 +3171,10 @@ void ARGBShuffleRow_Any_MSA(const uint8_t* src_ptr,
uint8_t* dst_ptr,
const uint8_t* param,
int width);
+void ARGBShuffleRow_Any_LSX(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ const uint8_t* param,
+ int width);
void ARGBShuffleRow_Any_LASX(const uint8_t* src_ptr,
uint8_t* dst_ptr,
const uint8_t* param,
@@ -2765,14 +3207,18 @@ void RGB24ToARGBRow_LSX(const uint8_t* src_rgb24, uint8_t* dst_argb, int width);
void RGB24ToARGBRow_LASX(const uint8_t* src_rgb24,
uint8_t* dst_argb,
int width);
+void RGB24ToARGBRow_RVV(const uint8_t* src_rgb24, uint8_t* dst_argb, int width);
void RAWToARGBRow_NEON(const uint8_t* src_raw, uint8_t* dst_argb, int width);
void RAWToRGBARow_NEON(const uint8_t* src_raw, uint8_t* dst_rgba, int width);
void RAWToARGBRow_MSA(const uint8_t* src_raw, uint8_t* dst_argb, int width);
void RAWToARGBRow_LSX(const uint8_t* src_raw, uint8_t* dst_argb, int width);
void RAWToARGBRow_LASX(const uint8_t* src_raw, uint8_t* dst_argb, int width);
+void RAWToARGBRow_RVV(const uint8_t* src_raw, uint8_t* dst_argb, int width);
+void RAWToRGBARow_RVV(const uint8_t* src_raw, uint8_t* dst_rgba, int width);
void RAWToRGB24Row_NEON(const uint8_t* src_raw, uint8_t* dst_rgb24, int width);
void RAWToRGB24Row_MSA(const uint8_t* src_raw, uint8_t* dst_rgb24, int width);
void RAWToRGB24Row_LSX(const uint8_t* src_raw, uint8_t* dst_rgb24, int width);
+void RAWToRGB24Row_RVV(const uint8_t* src_raw, uint8_t* dst_rgb24, int width);
void RGB565ToARGBRow_NEON(const uint8_t* src_rgb565,
uint8_t* dst_argb,
int width);
@@ -2932,15 +3378,15 @@ void ARGBToRGB24Row_AVX512VBMI(const uint8_t* src, uint8_t* dst, int width);
void ARGBToRGB565DitherRow_C(const uint8_t* src_argb,
uint8_t* dst_rgb,
- const uint32_t dither4,
+ uint32_t dither4,
int width);
void ARGBToRGB565DitherRow_SSE2(const uint8_t* src,
uint8_t* dst,
- const uint32_t dither4,
+ uint32_t dither4,
int width);
void ARGBToRGB565DitherRow_AVX2(const uint8_t* src,
uint8_t* dst,
- const uint32_t dither4,
+ uint32_t dither4,
int width);
void ARGBToRGB565Row_AVX2(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
@@ -2968,7 +3414,7 @@ void ARGBToARGB4444Row_NEON(const uint8_t* src_argb,
int width);
void ARGBToRGB565DitherRow_NEON(const uint8_t* src_argb,
uint8_t* dst_rgb,
- const uint32_t dither4,
+ uint32_t dither4,
int width);
void ARGBToRGB24Row_MSA(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
void ARGBToRAWRow_MSA(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
@@ -2981,23 +3427,39 @@ void ARGBToARGB4444Row_MSA(const uint8_t* src_argb,
int width);
void ARGBToRGB565DitherRow_MSA(const uint8_t* src_argb,
uint8_t* dst_rgb,
- const uint32_t dither4,
+ uint32_t dither4,
+ int width);
+void ARGBToRGB565DitherRow_LSX(const uint8_t* src_argb,
+ uint8_t* dst_rgb,
+ uint32_t dither4,
int width);
void ARGBToRGB565DitherRow_LASX(const uint8_t* src_argb,
uint8_t* dst_rgb,
- const uint32_t dither4,
+ uint32_t dither4,
int width);
+void ARGBToRGB24Row_LSX(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
void ARGBToRGB24Row_LASX(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
+void ARGBToRAWRow_LSX(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
void ARGBToRAWRow_LASX(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
+void ARGBToRGB565Row_LSX(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
void ARGBToRGB565Row_LASX(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
+void ARGBToARGB1555Row_LSX(const uint8_t* src_argb,
+ uint8_t* dst_rgb,
+ int width);
void ARGBToARGB1555Row_LASX(const uint8_t* src_argb,
uint8_t* dst_rgb,
int width);
+void ARGBToARGB4444Row_LSX(const uint8_t* src_argb,
+ uint8_t* dst_rgb,
+ int width);
void ARGBToARGB4444Row_LASX(const uint8_t* src_argb,
uint8_t* dst_rgb,
int width);
+void ARGBToRAWRow_RVV(const uint8_t* src_argb, uint8_t* dst_raw, int width);
+void ARGBToRGB24Row_RVV(const uint8_t* src_argb, uint8_t* dst_rgb24, int width);
+
void ARGBToRGBARow_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
void ARGBToRGB24Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
void ARGBToRAWRow_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
@@ -3035,6 +3497,10 @@ void ARGBToAR64Row_NEON(const uint8_t* src_argb, uint16_t* dst_ar64, int width);
void ARGBToAB64Row_NEON(const uint8_t* src_argb, uint16_t* dst_ab64, int width);
void AR64ToARGBRow_NEON(const uint16_t* src_ar64, uint8_t* dst_argb, int width);
void AB64ToARGBRow_NEON(const uint16_t* src_ab64, uint8_t* dst_argb, int width);
+void ARGBToAR64Row_RVV(const uint8_t* src_argb, uint16_t* dst_ar64, int width);
+void ARGBToAB64Row_RVV(const uint8_t* src_argb, uint16_t* dst_ab64, int width);
+void AR64ToARGBRow_RVV(const uint16_t* src_ar64, uint8_t* dst_argb, int width);
+void AB64ToARGBRow_RVV(const uint16_t* src_ab64, uint8_t* dst_argb, int width);
void ARGBToAR64Row_Any_SSSE3(const uint8_t* src_ptr,
uint16_t* dst_ptr,
int width);
@@ -3077,6 +3543,7 @@ void J400ToARGBRow_AVX2(const uint8_t* src_y, uint8_t* dst_argb, int width);
void J400ToARGBRow_NEON(const uint8_t* src_y, uint8_t* dst_argb, int width);
void J400ToARGBRow_MSA(const uint8_t* src_y, uint8_t* dst_argb, int width);
void J400ToARGBRow_LSX(const uint8_t* src_y, uint8_t* dst_argb, int width);
+void J400ToARGBRow_RVV(const uint8_t* src_y, uint8_t* dst_argb, int width);
void J400ToARGBRow_C(const uint8_t* src_y, uint8_t* dst_argb, int width);
void J400ToARGBRow_Any_SSE2(const uint8_t* src_ptr,
uint8_t* dst_ptr,
@@ -3096,6 +3563,12 @@ void I444ToARGBRow_C(const uint8_t* src_y,
uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants,
int width);
+void I444ToRGB24Row_C(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* rgb_buf,
+ const struct YuvConstants* yuvconstants,
+ int width);
void I422ToARGBRow_C(const uint8_t* src_y,
const uint8_t* src_u,
const uint8_t* src_v,
@@ -3290,6 +3763,18 @@ void I444ToARGBRow_AVX2(const uint8_t* y_buf,
uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
+void I444ToRGB24Row_SSSE3(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_rgb24,
+ const struct YuvConstants* yuvconstants,
+ int width);
+void I444ToRGB24Row_AVX2(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_rgb24,
+ const struct YuvConstants* yuvconstants,
+ int width);
void I422ToARGBRow_SSSE3(const uint8_t* y_buf,
const uint8_t* u_buf,
const uint8_t* v_buf,
@@ -3631,12 +4116,24 @@ void I444ToARGBRow_Any_SSSE3(const uint8_t* y_buf,
uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants,
int width);
+void I444ToRGB24Row_Any_SSSE3(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_ptr,
+ const struct YuvConstants* yuvconstants,
+ int width);
void I444ToARGBRow_Any_AVX2(const uint8_t* y_buf,
const uint8_t* u_buf,
const uint8_t* v_buf,
uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants,
int width);
+void I444ToRGB24Row_Any_AVX2(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_ptr,
+ const struct YuvConstants* yuvconstants,
+ int width);
void I422ToARGBRow_Any_SSSE3(const uint8_t* y_buf,
const uint8_t* u_buf,
const uint8_t* v_buf,
@@ -3823,13 +4320,13 @@ void NV21ToRGB24Row_Any_AVX2(const uint8_t* y_buf,
uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants,
int width);
-void NV21ToYUV24Row_Any_SSSE3(const uint8_t* src_y,
- const uint8_t* src_vu,
- uint8_t* dst_yuv24,
+void NV21ToYUV24Row_Any_SSSE3(const uint8_t* y_buf,
+ const uint8_t* uv_buf,
+ uint8_t* dst_ptr,
int width);
-void NV21ToYUV24Row_Any_AVX2(const uint8_t* src_y,
- const uint8_t* src_vu,
- uint8_t* dst_yuv24,
+void NV21ToYUV24Row_Any_AVX2(const uint8_t* y_buf,
+ const uint8_t* uv_buf,
+ uint8_t* dst_ptr,
int width);
void NV12ToRGB565Row_Any_SSSE3(const uint8_t* y_buf,
const uint8_t* uv_buf,
@@ -3976,6 +4473,10 @@ void I400ToARGBRow_LSX(const uint8_t* src_y,
uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
+void I400ToARGBRow_RVV(const uint8_t* src_y,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width);
void I400ToARGBRow_Any_SSE2(const uint8_t* src_ptr,
uint8_t* dst_ptr,
const struct YuvConstants* param,
@@ -4084,10 +4585,18 @@ void ARGBMultiplyRow_Any_MSA(const uint8_t* y_buf,
const uint8_t* uv_buf,
uint8_t* dst_ptr,
int width);
+void ARGBMultiplyRow_LSX(const uint8_t* src_argb0,
+ const uint8_t* src_argb1,
+ uint8_t* dst_argb,
+ int width);
void ARGBMultiplyRow_LASX(const uint8_t* src_argb0,
const uint8_t* src_argb1,
uint8_t* dst_argb,
int width);
+void ARGBMultiplyRow_Any_LSX(const uint8_t* y_buf,
+ const uint8_t* uv_buf,
+ uint8_t* dst_ptr,
+ int width);
void ARGBMultiplyRow_Any_LASX(const uint8_t* y_buf,
const uint8_t* uv_buf,
uint8_t* dst_ptr,
@@ -4130,10 +4639,18 @@ void ARGBAddRow_Any_MSA(const uint8_t* y_buf,
const uint8_t* uv_buf,
uint8_t* dst_ptr,
int width);
+void ARGBAddRow_LSX(const uint8_t* src_argb0,
+ const uint8_t* src_argb1,
+ uint8_t* dst_argb,
+ int width);
void ARGBAddRow_LASX(const uint8_t* src_argb0,
const uint8_t* src_argb1,
uint8_t* dst_argb,
int width);
+void ARGBAddRow_Any_LSX(const uint8_t* y_buf,
+ const uint8_t* uv_buf,
+ uint8_t* dst_ptr,
+ int width);
void ARGBAddRow_Any_LASX(const uint8_t* y_buf,
const uint8_t* uv_buf,
uint8_t* dst_ptr,
@@ -4177,10 +4694,18 @@ void ARGBSubtractRow_Any_MSA(const uint8_t* y_buf,
const uint8_t* uv_buf,
uint8_t* dst_ptr,
int width);
+void ARGBSubtractRow_LSX(const uint8_t* src_argb0,
+ const uint8_t* src_argb1,
+ uint8_t* dst_argb,
+ int width);
void ARGBSubtractRow_LASX(const uint8_t* src_argb0,
const uint8_t* src_argb1,
uint8_t* dst_argb,
int width);
+void ARGBSubtractRow_Any_LSX(const uint8_t* y_buf,
+ const uint8_t* uv_buf,
+ uint8_t* dst_ptr,
+ int width);
void ARGBSubtractRow_Any_LASX(const uint8_t* y_buf,
const uint8_t* uv_buf,
uint8_t* dst_ptr,
@@ -4273,21 +4798,37 @@ void ARGBToRGB565DitherRow_Any_MSA(const uint8_t* src_ptr,
uint8_t* dst_ptr,
const uint32_t param,
int width);
+void ARGBToRGB565DitherRow_Any_LSX(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ const uint32_t param,
+ int width);
void ARGBToRGB565DitherRow_Any_LASX(const uint8_t* src_ptr,
uint8_t* dst_ptr,
const uint32_t param,
int width);
-
+void ARGBToRGB24Row_Any_LSX(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ int width);
void ARGBToRGB24Row_Any_LASX(const uint8_t* src_ptr,
uint8_t* dst_ptr,
int width);
+void ARGBToRAWRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void ARGBToRAWRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void ARGBToRGB565Row_Any_LSX(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ int width);
void ARGBToRGB565Row_Any_LASX(const uint8_t* src_ptr,
uint8_t* dst_ptr,
int width);
+void ARGBToARGB1555Row_Any_LSX(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ int width);
void ARGBToARGB1555Row_Any_LASX(const uint8_t* src_ptr,
uint8_t* dst_ptr,
int width);
+void ARGBToARGB4444Row_Any_LSX(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ int width);
void ARGBToARGB4444Row_Any_LASX(const uint8_t* src_ptr,
uint8_t* dst_ptr,
int width);
@@ -4298,6 +4839,12 @@ void I444ToARGBRow_Any_NEON(const uint8_t* y_buf,
uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants,
int width);
+void I444ToRGB24Row_Any_NEON(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_ptr,
+ const struct YuvConstants* yuvconstants,
+ int width);
void I422ToARGBRow_Any_NEON(const uint8_t* y_buf,
const uint8_t* u_buf,
const uint8_t* v_buf,
@@ -4443,6 +4990,12 @@ void I422ToARGBRow_Any_MSA(const uint8_t* y_buf,
uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants,
int width);
+void I422ToARGBRow_Any_LSX(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_ptr,
+ const struct YuvConstants* yuvconstants,
+ int width);
void I422ToARGBRow_Any_LASX(const uint8_t* y_buf,
const uint8_t* u_buf,
const uint8_t* v_buf,
@@ -4455,6 +5008,12 @@ void I422ToRGBARow_Any_MSA(const uint8_t* y_buf,
uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants,
int width);
+void I422ToRGBARow_Any_LSX(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_ptr,
+ const struct YuvConstants* yuvconstants,
+ int width);
void I422ToRGBARow_Any_LASX(const uint8_t* y_buf,
const uint8_t* u_buf,
const uint8_t* v_buf,
@@ -4468,6 +5027,13 @@ void I422AlphaToARGBRow_Any_MSA(const uint8_t* y_buf,
uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants,
int width);
+void I422AlphaToARGBRow_Any_LSX(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ const uint8_t* a_buf,
+ uint8_t* dst_ptr,
+ const struct YuvConstants* yuvconstants,
+ int width);
void I422AlphaToARGBRow_Any_LASX(const uint8_t* y_buf,
const uint8_t* u_buf,
const uint8_t* v_buf,
@@ -4481,6 +5047,12 @@ void I422ToRGB24Row_Any_MSA(const uint8_t* y_buf,
uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants,
int width);
+void I422ToRGB24Row_Any_LSX(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_ptr,
+ const struct YuvConstants* yuvconstants,
+ int width);
void I422ToRGB24Row_Any_LASX(const uint8_t* y_buf,
const uint8_t* u_buf,
const uint8_t* v_buf,
@@ -4493,6 +5065,12 @@ void I422ToRGB565Row_Any_MSA(const uint8_t* y_buf,
uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants,
int width);
+void I422ToRGB565Row_Any_LSX(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_ptr,
+ const struct YuvConstants* yuvconstants,
+ int width);
void I422ToRGB565Row_Any_LASX(const uint8_t* y_buf,
const uint8_t* u_buf,
const uint8_t* v_buf,
@@ -4505,6 +5083,12 @@ void I422ToARGB4444Row_Any_MSA(const uint8_t* y_buf,
uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants,
int width);
+void I422ToARGB4444Row_Any_LSX(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_ptr,
+ const struct YuvConstants* yuvconstants,
+ int width);
void I422ToARGB4444Row_Any_LASX(const uint8_t* y_buf,
const uint8_t* u_buf,
const uint8_t* v_buf,
@@ -4517,6 +5101,12 @@ void I422ToARGB1555Row_Any_MSA(const uint8_t* y_buf,
uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants,
int width);
+void I422ToARGB1555Row_Any_LSX(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_ptr,
+ const struct YuvConstants* yuvconstants,
+ int width);
void I422ToARGB1555Row_Any_LASX(const uint8_t* y_buf,
const uint8_t* u_buf,
const uint8_t* v_buf,
@@ -4592,6 +5182,10 @@ void YUY2ToUVRow_AVX2(const uint8_t* src_yuy2,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
+void YUY2ToNVUVRow_AVX2(const uint8_t* src_yuy2,
+ int stride_yuy2,
+ uint8_t* dst_uv,
+ int width);
void YUY2ToUV422Row_AVX2(const uint8_t* src_yuy2,
uint8_t* dst_u,
uint8_t* dst_v,
@@ -4602,6 +5196,10 @@ void YUY2ToUVRow_SSE2(const uint8_t* src_yuy2,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
+void YUY2ToNVUVRow_SSE2(const uint8_t* src_yuy2,
+ int stride_yuy2,
+ uint8_t* dst_uv,
+ int width);
void YUY2ToUV422Row_SSE2(const uint8_t* src_yuy2,
uint8_t* dst_u,
uint8_t* dst_v,
@@ -4612,17 +5210,27 @@ void YUY2ToUVRow_NEON(const uint8_t* src_yuy2,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
+void YUY2ToNVUVRow_NEON(const uint8_t* src_yuy2,
+ int stride_yuy2,
+ uint8_t* dst_uv,
+ int width);
void YUY2ToUV422Row_NEON(const uint8_t* src_yuy2,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
void YUY2ToYRow_MSA(const uint8_t* src_yuy2, uint8_t* dst_y, int width);
+void YUY2ToYRow_LSX(const uint8_t* src_yuy2, uint8_t* dst_y, int width);
void YUY2ToYRow_LASX(const uint8_t* src_yuy2, uint8_t* dst_y, int width);
void YUY2ToUVRow_MSA(const uint8_t* src_yuy2,
int src_stride_yuy2,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
+void YUY2ToUVRow_LSX(const uint8_t* src_yuy2,
+ int src_stride_yuy2,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
void YUY2ToUVRow_LASX(const uint8_t* src_yuy2,
int src_stride_yuy2,
uint8_t* dst_u,
@@ -4632,6 +5240,10 @@ void YUY2ToUV422Row_MSA(const uint8_t* src_yuy2,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
+void YUY2ToUV422Row_LSX(const uint8_t* src_yuy2,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
void YUY2ToUV422Row_LASX(const uint8_t* src_yuy2,
uint8_t* dst_u,
uint8_t* dst_v,
@@ -4642,6 +5254,10 @@ void YUY2ToUVRow_C(const uint8_t* src_yuy2,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
+void YUY2ToNVUVRow_C(const uint8_t* src_yuy2,
+ int src_stride_yuy2,
+ uint8_t* dst_uv,
+ int width);
void YUY2ToUV422Row_C(const uint8_t* src_yuy2,
uint8_t* dst_u,
uint8_t* dst_v,
@@ -4652,6 +5268,10 @@ void YUY2ToUVRow_Any_AVX2(const uint8_t* src_ptr,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
+void YUY2ToNVUVRow_Any_AVX2(const uint8_t* src_yuy2,
+ int stride_yuy2,
+ uint8_t* dst_uv,
+ int width);
void YUY2ToUV422Row_Any_AVX2(const uint8_t* src_ptr,
uint8_t* dst_u,
uint8_t* dst_v,
@@ -4662,6 +5282,10 @@ void YUY2ToUVRow_Any_SSE2(const uint8_t* src_ptr,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
+void YUY2ToNVUVRow_Any_SSE2(const uint8_t* src_yuy2,
+ int stride_yuy2,
+ uint8_t* dst_uv,
+ int width);
void YUY2ToUV422Row_Any_SSE2(const uint8_t* src_ptr,
uint8_t* dst_u,
uint8_t* dst_v,
@@ -4672,17 +5296,27 @@ void YUY2ToUVRow_Any_NEON(const uint8_t* src_ptr,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
+void YUY2ToNVUVRow_Any_NEON(const uint8_t* src_yuy2,
+ int stride_yuy2,
+ uint8_t* dst_uv,
+ int width);
void YUY2ToUV422Row_Any_NEON(const uint8_t* src_ptr,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
void YUY2ToYRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void YUY2ToYRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void YUY2ToYRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void YUY2ToUVRow_Any_MSA(const uint8_t* src_ptr,
int src_stride_ptr,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
+void YUY2ToUVRow_Any_LSX(const uint8_t* src_ptr,
+ int src_stride_ptr,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
void YUY2ToUVRow_Any_LASX(const uint8_t* src_ptr,
int src_stride_ptr,
uint8_t* dst_u,
@@ -4692,6 +5326,10 @@ void YUY2ToUV422Row_Any_MSA(const uint8_t* src_ptr,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
+void YUY2ToUV422Row_Any_LSX(const uint8_t* src_ptr,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
void YUY2ToUV422Row_Any_LASX(const uint8_t* src_ptr,
uint8_t* dst_u,
uint8_t* dst_v,
@@ -4737,12 +5375,18 @@ void UYVYToUV422Row_NEON(const uint8_t* src_uyvy,
uint8_t* dst_v,
int width);
void UYVYToYRow_MSA(const uint8_t* src_uyvy, uint8_t* dst_y, int width);
+void UYVYToYRow_LSX(const uint8_t* src_uyvy, uint8_t* dst_y, int width);
void UYVYToYRow_LASX(const uint8_t* src_uyvy, uint8_t* dst_y, int width);
void UYVYToUVRow_MSA(const uint8_t* src_uyvy,
int src_stride_uyvy,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
+void UYVYToUVRow_LSX(const uint8_t* src_uyvy,
+ int src_stride_uyvy,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
void UYVYToUVRow_LASX(const uint8_t* src_uyvy,
int src_stride_uyvy,
uint8_t* dst_u,
@@ -4752,6 +5396,10 @@ void UYVYToUV422Row_MSA(const uint8_t* src_uyvy,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
+void UYVYToUV422Row_LSX(const uint8_t* src_uyvy,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
void UYVYToUV422Row_LASX(const uint8_t* src_uyvy,
uint8_t* dst_u,
uint8_t* dst_v,
@@ -4798,12 +5446,18 @@ void UYVYToUV422Row_Any_NEON(const uint8_t* src_ptr,
uint8_t* dst_v,
int width);
void UYVYToYRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void UYVYToYRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void UYVYToYRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void UYVYToUVRow_Any_MSA(const uint8_t* src_ptr,
int src_stride_ptr,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
+void UYVYToUVRow_Any_LSX(const uint8_t* src_ptr,
+ int src_stride_ptr,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
void UYVYToUVRow_Any_LASX(const uint8_t* src_ptr,
int src_stride_ptr,
uint8_t* dst_u,
@@ -4813,6 +5467,10 @@ void UYVYToUV422Row_Any_MSA(const uint8_t* src_ptr,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
+void UYVYToUV422Row_Any_LSX(const uint8_t* src_ptr,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
void UYVYToUV422Row_Any_LASX(const uint8_t* src_ptr,
uint8_t* dst_u,
uint8_t* dst_v,
@@ -4927,6 +5585,11 @@ void I422ToYUY2Row_MSA(const uint8_t* src_y,
const uint8_t* src_v,
uint8_t* dst_yuy2,
int width);
+void I422ToYUY2Row_LSX(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_yuy2,
+ int width);
void I422ToYUY2Row_LASX(const uint8_t* src_y,
const uint8_t* src_u,
const uint8_t* src_v,
@@ -4937,6 +5600,11 @@ void I422ToUYVYRow_MSA(const uint8_t* src_y,
const uint8_t* src_v,
uint8_t* dst_uyvy,
int width);
+void I422ToUYVYRow_LSX(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_uyvy,
+ int width);
void I422ToUYVYRow_LASX(const uint8_t* src_y,
const uint8_t* src_u,
const uint8_t* src_v,
@@ -4947,6 +5615,11 @@ void I422ToYUY2Row_Any_MSA(const uint8_t* y_buf,
const uint8_t* v_buf,
uint8_t* dst_ptr,
int width);
+void I422ToYUY2Row_Any_LSX(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_ptr,
+ int width);
void I422ToYUY2Row_Any_LASX(const uint8_t* y_buf,
const uint8_t* u_buf,
const uint8_t* v_buf,
@@ -4957,6 +5630,11 @@ void I422ToUYVYRow_Any_MSA(const uint8_t* y_buf,
const uint8_t* v_buf,
uint8_t* dst_ptr,
int width);
+void I422ToUYVYRow_Any_LSX(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_ptr,
+ int width);
void I422ToUYVYRow_Any_LASX(const uint8_t* y_buf,
const uint8_t* u_buf,
const uint8_t* v_buf,
@@ -4977,9 +5655,15 @@ void ARGBAttenuateRow_NEON(const uint8_t* src_argb,
void ARGBAttenuateRow_MSA(const uint8_t* src_argb,
uint8_t* dst_argb,
int width);
+void ARGBAttenuateRow_LSX(const uint8_t* src_argb,
+ uint8_t* dst_argb,
+ int width);
void ARGBAttenuateRow_LASX(const uint8_t* src_argb,
uint8_t* dst_argb,
int width);
+void ARGBAttenuateRow_RVV(const uint8_t* src_argb,
+ uint8_t* dst_argb,
+ int width);
void ARGBAttenuateRow_Any_SSSE3(const uint8_t* src_ptr,
uint8_t* dst_ptr,
int width);
@@ -4992,6 +5676,9 @@ void ARGBAttenuateRow_Any_NEON(const uint8_t* src_ptr,
void ARGBAttenuateRow_Any_MSA(const uint8_t* src_ptr,
uint8_t* dst_ptr,
int width);
+void ARGBAttenuateRow_Any_LSX(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ int width);
void ARGBAttenuateRow_Any_LASX(const uint8_t* src_ptr,
uint8_t* dst_ptr,
int width);
@@ -5018,12 +5705,14 @@ void ARGBGrayRow_C(const uint8_t* src_argb, uint8_t* dst_argb, int width);
void ARGBGrayRow_SSSE3(const uint8_t* src_argb, uint8_t* dst_argb, int width);
void ARGBGrayRow_NEON(const uint8_t* src_argb, uint8_t* dst_argb, int width);
void ARGBGrayRow_MSA(const uint8_t* src_argb, uint8_t* dst_argb, int width);
+void ARGBGrayRow_LSX(const uint8_t* src_argb, uint8_t* dst_argb, int width);
void ARGBGrayRow_LASX(const uint8_t* src_argb, uint8_t* dst_argb, int width);
void ARGBSepiaRow_C(uint8_t* dst_argb, int width);
void ARGBSepiaRow_SSSE3(uint8_t* dst_argb, int width);
void ARGBSepiaRow_NEON(uint8_t* dst_argb, int width);
void ARGBSepiaRow_MSA(uint8_t* dst_argb, int width);
+void ARGBSepiaRow_LSX(uint8_t* dst_argb, int width);
void ARGBSepiaRow_LASX(uint8_t* dst_argb, int width);
void ARGBColorMatrixRow_C(const uint8_t* src_argb,
@@ -5103,6 +5792,10 @@ void ARGBShadeRow_MSA(const uint8_t* src_argb,
uint8_t* dst_argb,
int width,
uint32_t value);
+void ARGBShadeRow_LSX(const uint8_t* src_argb,
+ uint8_t* dst_argb,
+ int width,
+ uint32_t value);
void ARGBShadeRow_LASX(const uint8_t* src_argb,
uint8_t* dst_argb,
int width,
@@ -5175,6 +5868,11 @@ void InterpolateRow_LSX(uint8_t* dst_ptr,
ptrdiff_t src_stride,
int width,
int source_y_fraction);
+void InterpolateRow_RVV(uint8_t* dst_ptr,
+ const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ int width,
+ int source_y_fraction);
void InterpolateRow_Any_NEON(uint8_t* dst_ptr,
const uint8_t* src_ptr,
ptrdiff_t src_stride_ptr,
@@ -5526,6 +6224,17 @@ void GaussCol_F32_C(const float* src0,
float* dst,
int width);
+void GaussRow_C(const uint32_t* src, uint16_t* dst, int width);
+void GaussCol_C(const uint16_t* src0,
+ const uint16_t* src1,
+ const uint16_t* src2,
+ const uint16_t* src3,
+ const uint16_t* src4,
+ uint32_t* dst,
+ int width);
+
+void ClampFloatToZero_SSE2(const float* src_x, float* dst_y, int width);
+
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
diff --git a/files/include/libyuv/scale_row.h b/files/include/libyuv/scale_row.h
index 6cb5e128..a7957c3f 100644
--- a/files/include/libyuv/scale_row.h
+++ b/files/include/libyuv/scale_row.h
@@ -133,6 +133,8 @@ extern "C" {
#define HAS_SCALEROWDOWN34_NEON
#define HAS_SCALEROWDOWN38_NEON
#define HAS_SCALEROWDOWN4_NEON
+#define HAS_SCALEUVROWDOWN2_NEON
+#define HAS_SCALEUVROWDOWN2LINEAR_NEON
#define HAS_SCALEUVROWDOWN2BOX_NEON
#define HAS_SCALEUVROWDOWNEVEN_NEON
#define HAS_SCALEROWUP2_LINEAR_NEON
@@ -214,6 +216,17 @@ void ScalePlaneVertical_16To8(int src_height,
int scale,
enum FilterMode filtering);
+void ScalePlaneDown2_16To8(int src_width,
+ int src_height,
+ int dst_width,
+ int dst_height,
+ int src_stride,
+ int dst_stride,
+ const uint16_t* src_ptr,
+ uint8_t* dst_ptr,
+ int scale,
+ enum FilterMode filtering);
+
// Simplify the filtering based on scale factors.
enum FilterMode ScaleFilterReduce(int src_width,
int src_height,
@@ -259,6 +272,16 @@ void ScaleRowDown2_16_C(const uint16_t* src_ptr,
ptrdiff_t src_stride,
uint16_t* dst,
int dst_width);
+void ScaleRowDown2_16To8_C(const uint16_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst,
+ int dst_width,
+ int scale);
+void ScaleRowDown2_16To8_Odd_C(const uint16_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst,
+ int dst_width,
+ int scale);
void ScaleRowDown2Linear_C(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst,
@@ -267,6 +290,16 @@ void ScaleRowDown2Linear_16_C(const uint16_t* src_ptr,
ptrdiff_t src_stride,
uint16_t* dst,
int dst_width);
+void ScaleRowDown2Linear_16To8_C(const uint16_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst,
+ int dst_width,
+ int scale);
+void ScaleRowDown2Linear_16To8_Odd_C(const uint16_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst,
+ int dst_width,
+ int scale);
void ScaleRowDown2Box_C(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst,
@@ -279,6 +312,16 @@ void ScaleRowDown2Box_16_C(const uint16_t* src_ptr,
ptrdiff_t src_stride,
uint16_t* dst,
int dst_width);
+void ScaleRowDown2Box_16To8_C(const uint16_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst,
+ int dst_width,
+ int scale);
+void ScaleRowDown2Box_16To8_Odd_C(const uint16_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst,
+ int dst_width,
+ int scale);
void ScaleRowDown4_C(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst,
diff --git a/files/include/libyuv/version.h b/files/include/libyuv/version.h
index a85be048..b6623dbb 100644
--- a/files/include/libyuv/version.h
+++ b/files/include/libyuv/version.h
@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_
#define INCLUDE_LIBYUV_VERSION_H_
-#define LIBYUV_VERSION 1837
+#define LIBYUV_VERSION 1871
#endif // INCLUDE_LIBYUV_VERSION_H_
diff --git a/files/infra/config/PRESUBMIT.py b/files/infra/config/PRESUBMIT.py
index 01ec0eed..f79e08ad 100644
--- a/files/infra/config/PRESUBMIT.py
+++ b/files/infra/config/PRESUBMIT.py
@@ -2,6 +2,8 @@
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
+USE_PYTHON3 = True
+
def CheckChangeOnUpload(input_api, output_api):
return input_api.canned_checks.CheckChangedLUCIConfigs(input_api, output_api)
diff --git a/files/infra/config/cr-buildbucket.cfg b/files/infra/config/cr-buildbucket.cfg
index 061cf33b..be9d1d28 100644
--- a/files/infra/config/cr-buildbucket.cfg
+++ b/files/infra/config/cr-buildbucket.cfg
@@ -34,6 +34,10 @@ buckets {
' "server_host": "goma.chromium.org",'
' "use_luci_auth": true'
' },'
+ ' "$build/reclient": {'
+ ' "instance": "rbe-webrtc-trusted",'
+ ' "metrics_project": "chromium-reclient-metrics"'
+ ' },'
' "builder_group": "client.libyuv",'
' "recipe": "libyuv/libyuv"'
'}'
@@ -65,6 +69,10 @@ buckets {
' "server_host": "goma.chromium.org",'
' "use_luci_auth": true'
' },'
+ ' "$build/reclient": {'
+ ' "instance": "rbe-webrtc-trusted",'
+ ' "metrics_project": "chromium-reclient-metrics"'
+ ' },'
' "builder_group": "client.libyuv",'
' "recipe": "libyuv/libyuv"'
'}'
@@ -96,6 +104,10 @@ buckets {
' "server_host": "goma.chromium.org",'
' "use_luci_auth": true'
' },'
+ ' "$build/reclient": {'
+ ' "instance": "rbe-webrtc-trusted",'
+ ' "metrics_project": "chromium-reclient-metrics"'
+ ' },'
' "builder_group": "client.libyuv",'
' "recipe": "libyuv/libyuv"'
'}'
@@ -111,7 +123,7 @@ buckets {
name: "Android Tester ARM32 Debug (Nexus 5X)"
swarming_host: "chromium-swarm.appspot.com"
swarming_tags: "vpython:native-python-wrapper"
- dimensions: "device_type:bullhead"
+ dimensions: "device_type:walleye"
dimensions: "pool:luci.flex.ci"
exe {
cipd_package: "infra/recipe_bundles/chromium.googlesource.com/chromium/tools/build"
@@ -124,6 +136,10 @@ buckets {
' "server_host": "goma.chromium.org",'
' "use_luci_auth": true'
' },'
+ ' "$build/reclient": {'
+ ' "instance": "rbe-webrtc-trusted",'
+ ' "metrics_project": "chromium-reclient-metrics"'
+ ' },'
' "builder_group": "client.libyuv",'
' "recipe": "libyuv/libyuv"'
'}'
@@ -139,7 +155,7 @@ buckets {
name: "Android Tester ARM32 Release (Nexus 5X)"
swarming_host: "chromium-swarm.appspot.com"
swarming_tags: "vpython:native-python-wrapper"
- dimensions: "device_type:bullhead"
+ dimensions: "device_type:walleye"
dimensions: "pool:luci.flex.ci"
exe {
cipd_package: "infra/recipe_bundles/chromium.googlesource.com/chromium/tools/build"
@@ -152,6 +168,10 @@ buckets {
' "server_host": "goma.chromium.org",'
' "use_luci_auth": true'
' },'
+ ' "$build/reclient": {'
+ ' "instance": "rbe-webrtc-trusted",'
+ ' "metrics_project": "chromium-reclient-metrics"'
+ ' },'
' "builder_group": "client.libyuv",'
' "recipe": "libyuv/libyuv"'
'}'
@@ -167,7 +187,7 @@ buckets {
name: "Android Tester ARM64 Debug (Nexus 5X)"
swarming_host: "chromium-swarm.appspot.com"
swarming_tags: "vpython:native-python-wrapper"
- dimensions: "device_type:bullhead"
+ dimensions: "device_type:walleye"
dimensions: "pool:luci.flex.ci"
exe {
cipd_package: "infra/recipe_bundles/chromium.googlesource.com/chromium/tools/build"
@@ -180,6 +200,10 @@ buckets {
' "server_host": "goma.chromium.org",'
' "use_luci_auth": true'
' },'
+ ' "$build/reclient": {'
+ ' "instance": "rbe-webrtc-trusted",'
+ ' "metrics_project": "chromium-reclient-metrics"'
+ ' },'
' "builder_group": "client.libyuv",'
' "recipe": "libyuv/libyuv"'
'}'
@@ -211,6 +235,10 @@ buckets {
' "server_host": "goma.chromium.org",'
' "use_luci_auth": true'
' },'
+ ' "$build/reclient": {'
+ ' "instance": "rbe-webrtc-trusted",'
+ ' "metrics_project": "chromium-reclient-metrics"'
+ ' },'
' "builder_group": "client.libyuv",'
' "recipe": "libyuv/libyuv"'
'}'
@@ -242,6 +270,10 @@ buckets {
' "server_host": "goma.chromium.org",'
' "use_luci_auth": true'
' },'
+ ' "$build/reclient": {'
+ ' "instance": "rbe-webrtc-trusted",'
+ ' "metrics_project": "chromium-reclient-metrics"'
+ ' },'
' "builder_group": "client.libyuv",'
' "recipe": "libyuv/libyuv"'
'}'
@@ -273,6 +305,10 @@ buckets {
' "server_host": "goma.chromium.org",'
' "use_luci_auth": true'
' },'
+ ' "$build/reclient": {'
+ ' "instance": "rbe-webrtc-trusted",'
+ ' "metrics_project": "chromium-reclient-metrics"'
+ ' },'
' "builder_group": "client.libyuv",'
' "recipe": "libyuv/libyuv"'
'}'
@@ -304,6 +340,10 @@ buckets {
' "server_host": "goma.chromium.org",'
' "use_luci_auth": true'
' },'
+ ' "$build/reclient": {'
+ ' "instance": "rbe-webrtc-trusted",'
+ ' "metrics_project": "chromium-reclient-metrics"'
+ ' },'
' "builder_group": "client.libyuv",'
' "recipe": "libyuv/libyuv"'
'}'
@@ -335,6 +375,10 @@ buckets {
' "server_host": "goma.chromium.org",'
' "use_luci_auth": true'
' },'
+ ' "$build/reclient": {'
+ ' "instance": "rbe-webrtc-trusted",'
+ ' "metrics_project": "chromium-reclient-metrics"'
+ ' },'
' "builder_group": "client.libyuv",'
' "recipe": "libyuv/libyuv"'
'}'
@@ -366,6 +410,10 @@ buckets {
' "server_host": "goma.chromium.org",'
' "use_luci_auth": true'
' },'
+ ' "$build/reclient": {'
+ ' "instance": "rbe-webrtc-trusted",'
+ ' "metrics_project": "chromium-reclient-metrics"'
+ ' },'
' "builder_group": "client.libyuv",'
' "recipe": "libyuv/libyuv"'
'}'
@@ -397,6 +445,10 @@ buckets {
' "server_host": "goma.chromium.org",'
' "use_luci_auth": true'
' },'
+ ' "$build/reclient": {'
+ ' "instance": "rbe-webrtc-trusted",'
+ ' "metrics_project": "chromium-reclient-metrics"'
+ ' },'
' "builder_group": "client.libyuv",'
' "recipe": "libyuv/libyuv"'
'}'
@@ -428,6 +480,10 @@ buckets {
' "server_host": "goma.chromium.org",'
' "use_luci_auth": true'
' },'
+ ' "$build/reclient": {'
+ ' "instance": "rbe-webrtc-trusted",'
+ ' "metrics_project": "chromium-reclient-metrics"'
+ ' },'
' "builder_group": "client.libyuv",'
' "recipe": "libyuv/libyuv"'
'}'
@@ -459,6 +515,10 @@ buckets {
' "server_host": "goma.chromium.org",'
' "use_luci_auth": true'
' },'
+ ' "$build/reclient": {'
+ ' "instance": "rbe-webrtc-trusted",'
+ ' "metrics_project": "chromium-reclient-metrics"'
+ ' },'
' "builder_group": "client.libyuv",'
' "recipe": "libyuv/libyuv"'
'}'
@@ -490,6 +550,10 @@ buckets {
' "server_host": "goma.chromium.org",'
' "use_luci_auth": true'
' },'
+ ' "$build/reclient": {'
+ ' "instance": "rbe-webrtc-trusted",'
+ ' "metrics_project": "chromium-reclient-metrics"'
+ ' },'
' "builder_group": "client.libyuv",'
' "recipe": "libyuv/libyuv"'
'}'
@@ -521,6 +585,10 @@ buckets {
' "server_host": "goma.chromium.org",'
' "use_luci_auth": true'
' },'
+ ' "$build/reclient": {'
+ ' "instance": "rbe-webrtc-trusted",'
+ ' "metrics_project": "chromium-reclient-metrics"'
+ ' },'
' "builder_group": "client.libyuv",'
' "recipe": "libyuv/libyuv"'
'}'
@@ -537,7 +605,7 @@ buckets {
swarming_host: "chromium-swarm.appspot.com"
swarming_tags: "vpython:native-python-wrapper"
dimensions: "cpu:x86-64"
- dimensions: "os:Mac-10.15"
+ dimensions: "os:Mac-12"
dimensions: "pool:luci.flex.ci"
exe {
cipd_package: "infra/recipe_bundles/chromium.googlesource.com/chromium/tools/build"
@@ -550,6 +618,10 @@ buckets {
' "server_host": "goma.chromium.org",'
' "use_luci_auth": true'
' },'
+ ' "$build/reclient": {'
+ ' "instance": "rbe-webrtc-trusted",'
+ ' "metrics_project": "chromium-reclient-metrics"'
+ ' },'
' "builder_group": "client.libyuv",'
' "recipe": "libyuv/libyuv"'
'}'
@@ -566,7 +638,7 @@ buckets {
swarming_host: "chromium-swarm.appspot.com"
swarming_tags: "vpython:native-python-wrapper"
dimensions: "cpu:x86-64"
- dimensions: "os:Mac-10.15"
+ dimensions: "os:Mac-12"
dimensions: "pool:luci.flex.ci"
exe {
cipd_package: "infra/recipe_bundles/chromium.googlesource.com/chromium/tools/build"
@@ -579,6 +651,10 @@ buckets {
' "server_host": "goma.chromium.org",'
' "use_luci_auth": true'
' },'
+ ' "$build/reclient": {'
+ ' "instance": "rbe-webrtc-trusted",'
+ ' "metrics_project": "chromium-reclient-metrics"'
+ ' },'
' "builder_group": "client.libyuv",'
' "recipe": "libyuv/libyuv"'
'}'
@@ -595,7 +671,7 @@ buckets {
swarming_host: "chromium-swarm.appspot.com"
swarming_tags: "vpython:native-python-wrapper"
dimensions: "cpu:x86-64"
- dimensions: "os:Mac-10.15"
+ dimensions: "os:Mac-12"
dimensions: "pool:luci.flex.ci"
exe {
cipd_package: "infra/recipe_bundles/chromium.googlesource.com/chromium/tools/build"
@@ -608,6 +684,10 @@ buckets {
' "server_host": "goma.chromium.org",'
' "use_luci_auth": true'
' },'
+ ' "$build/reclient": {'
+ ' "instance": "rbe-webrtc-trusted",'
+ ' "metrics_project": "chromium-reclient-metrics"'
+ ' },'
' "builder_group": "client.libyuv",'
' "recipe": "libyuv/libyuv"'
'}'
@@ -639,6 +719,10 @@ buckets {
' "server_host": "goma.chromium.org",'
' "use_luci_auth": true'
' },'
+ ' "$build/reclient": {'
+ ' "instance": "rbe-webrtc-trusted",'
+ ' "metrics_project": "chromium-reclient-metrics"'
+ ' },'
' "builder_group": "client.libyuv",'
' "recipe": "libyuv/libyuv"'
'}'
@@ -670,6 +754,10 @@ buckets {
' "server_host": "goma.chromium.org",'
' "use_luci_auth": true'
' },'
+ ' "$build/reclient": {'
+ ' "instance": "rbe-webrtc-trusted",'
+ ' "metrics_project": "chromium-reclient-metrics"'
+ ' },'
' "builder_group": "client.libyuv",'
' "recipe": "libyuv/libyuv"'
'}'
@@ -701,6 +789,10 @@ buckets {
' "server_host": "goma.chromium.org",'
' "use_luci_auth": true'
' },'
+ ' "$build/reclient": {'
+ ' "instance": "rbe-webrtc-trusted",'
+ ' "metrics_project": "chromium-reclient-metrics"'
+ ' },'
' "builder_group": "client.libyuv",'
' "recipe": "libyuv/libyuv"'
'}'
@@ -732,6 +824,10 @@ buckets {
' "server_host": "goma.chromium.org",'
' "use_luci_auth": true'
' },'
+ ' "$build/reclient": {'
+ ' "instance": "rbe-webrtc-trusted",'
+ ' "metrics_project": "chromium-reclient-metrics"'
+ ' },'
' "builder_group": "client.libyuv",'
' "recipe": "libyuv/libyuv"'
'}'
@@ -763,6 +859,10 @@ buckets {
' "server_host": "goma.chromium.org",'
' "use_luci_auth": true'
' },'
+ ' "$build/reclient": {'
+ ' "instance": "rbe-webrtc-trusted",'
+ ' "metrics_project": "chromium-reclient-metrics"'
+ ' },'
' "builder_group": "client.libyuv",'
' "recipe": "libyuv/libyuv"'
'}'
@@ -794,6 +894,10 @@ buckets {
' "server_host": "goma.chromium.org",'
' "use_luci_auth": true'
' },'
+ ' "$build/reclient": {'
+ ' "instance": "rbe-webrtc-trusted",'
+ ' "metrics_project": "chromium-reclient-metrics"'
+ ' },'
' "builder_group": "client.libyuv",'
' "recipe": "libyuv/libyuv"'
'}'
@@ -825,6 +929,10 @@ buckets {
' "server_host": "goma.chromium.org",'
' "use_luci_auth": true'
' },'
+ ' "$build/reclient": {'
+ ' "instance": "rbe-webrtc-trusted",'
+ ' "metrics_project": "chromium-reclient-metrics"'
+ ' },'
' "builder_group": "client.libyuv",'
' "recipe": "libyuv/libyuv"'
'}'
@@ -856,6 +964,10 @@ buckets {
' "server_host": "goma.chromium.org",'
' "use_luci_auth": true'
' },'
+ ' "$build/reclient": {'
+ ' "instance": "rbe-webrtc-trusted",'
+ ' "metrics_project": "chromium-reclient-metrics"'
+ ' },'
' "builder_group": "client.libyuv",'
' "recipe": "libyuv/libyuv"'
'}'
@@ -872,7 +984,7 @@ buckets {
swarming_host: "chromium-swarm.appspot.com"
swarming_tags: "vpython:native-python-wrapper"
dimensions: "cpu:x86-64"
- dimensions: "os:Mac-10.15"
+ dimensions: "os:Mac-12"
dimensions: "pool:luci.flex.ci"
exe {
cipd_package: "infra/recipe_bundles/chromium.googlesource.com/chromium/tools/build"
@@ -885,6 +997,10 @@ buckets {
' "server_host": "goma.chromium.org",'
' "use_luci_auth": true'
' },'
+ ' "$build/reclient": {'
+ ' "instance": "rbe-webrtc-trusted",'
+ ' "metrics_project": "chromium-reclient-metrics"'
+ ' },'
' "builder_group": "client.libyuv",'
' "recipe": "libyuv/libyuv"'
'}'
@@ -901,7 +1017,7 @@ buckets {
swarming_host: "chromium-swarm.appspot.com"
swarming_tags: "vpython:native-python-wrapper"
dimensions: "cpu:x86-64"
- dimensions: "os:Mac-10.15"
+ dimensions: "os:Mac-12"
dimensions: "pool:luci.flex.ci"
exe {
cipd_package: "infra/recipe_bundles/chromium.googlesource.com/chromium/tools/build"
@@ -914,6 +1030,10 @@ buckets {
' "server_host": "goma.chromium.org",'
' "use_luci_auth": true'
' },'
+ ' "$build/reclient": {'
+ ' "instance": "rbe-webrtc-trusted",'
+ ' "metrics_project": "chromium-reclient-metrics"'
+ ' },'
' "builder_group": "client.libyuv",'
' "recipe": "libyuv/libyuv"'
'}'
@@ -985,7 +1105,7 @@ buckets {
name: "android"
swarming_host: "chromium-swarm.appspot.com"
swarming_tags: "vpython:native-python-wrapper"
- dimensions: "device_type:bullhead"
+ dimensions: "device_type:walleye"
dimensions: "pool:luci.flex.try"
exe {
cipd_package: "infra/recipe_bundles/chromium.googlesource.com/chromium/tools/build"
@@ -998,6 +1118,10 @@ buckets {
' "server_host": "goma.chromium.org",'
' "use_luci_auth": true'
' },'
+ ' "$build/reclient": {'
+ ' "instance": "rbe-webrtc-untrusted",'
+ ' "metrics_project": "chromium-reclient-metrics"'
+ ' },'
' "builder_group": "tryserver.libyuv",'
' "recipe": "libyuv/libyuv"'
'}'
@@ -1013,7 +1137,7 @@ buckets {
name: "android_arm64"
swarming_host: "chromium-swarm.appspot.com"
swarming_tags: "vpython:native-python-wrapper"
- dimensions: "device_type:bullhead"
+ dimensions: "device_type:walleye"
dimensions: "pool:luci.flex.try"
exe {
cipd_package: "infra/recipe_bundles/chromium.googlesource.com/chromium/tools/build"
@@ -1026,6 +1150,10 @@ buckets {
' "server_host": "goma.chromium.org",'
' "use_luci_auth": true'
' },'
+ ' "$build/reclient": {'
+ ' "instance": "rbe-webrtc-untrusted",'
+ ' "metrics_project": "chromium-reclient-metrics"'
+ ' },'
' "builder_group": "tryserver.libyuv",'
' "recipe": "libyuv/libyuv"'
'}'
@@ -1041,7 +1169,7 @@ buckets {
name: "android_rel"
swarming_host: "chromium-swarm.appspot.com"
swarming_tags: "vpython:native-python-wrapper"
- dimensions: "device_type:bullhead"
+ dimensions: "device_type:walleye"
dimensions: "pool:luci.flex.try"
exe {
cipd_package: "infra/recipe_bundles/chromium.googlesource.com/chromium/tools/build"
@@ -1054,6 +1182,10 @@ buckets {
' "server_host": "goma.chromium.org",'
' "use_luci_auth": true'
' },'
+ ' "$build/reclient": {'
+ ' "instance": "rbe-webrtc-untrusted",'
+ ' "metrics_project": "chromium-reclient-metrics"'
+ ' },'
' "builder_group": "tryserver.libyuv",'
' "recipe": "libyuv/libyuv"'
'}'
@@ -1085,6 +1217,10 @@ buckets {
' "server_host": "goma.chromium.org",'
' "use_luci_auth": true'
' },'
+ ' "$build/reclient": {'
+ ' "instance": "rbe-webrtc-untrusted",'
+ ' "metrics_project": "chromium-reclient-metrics"'
+ ' },'
' "builder_group": "tryserver.libyuv",'
' "recipe": "libyuv/libyuv"'
'}'
@@ -1116,6 +1252,10 @@ buckets {
' "server_host": "goma.chromium.org",'
' "use_luci_auth": true'
' },'
+ ' "$build/reclient": {'
+ ' "instance": "rbe-webrtc-untrusted",'
+ ' "metrics_project": "chromium-reclient-metrics"'
+ ' },'
' "builder_group": "tryserver.libyuv",'
' "recipe": "libyuv/libyuv"'
'}'
@@ -1132,7 +1272,7 @@ buckets {
swarming_host: "chromium-swarm.appspot.com"
swarming_tags: "vpython:native-python-wrapper"
dimensions: "cpu:x86-64"
- dimensions: "os:Mac-10.15"
+ dimensions: "os:Mac-12"
dimensions: "pool:luci.flex.try"
exe {
cipd_package: "infra/recipe_bundles/chromium.googlesource.com/chromium/tools/build"
@@ -1145,6 +1285,10 @@ buckets {
' "server_host": "goma.chromium.org",'
' "use_luci_auth": true'
' },'
+ ' "$build/reclient": {'
+ ' "instance": "rbe-webrtc-untrusted",'
+ ' "metrics_project": "chromium-reclient-metrics"'
+ ' },'
' "builder_group": "tryserver.libyuv",'
' "recipe": "libyuv/libyuv"'
'}'
@@ -1161,7 +1305,7 @@ buckets {
swarming_host: "chromium-swarm.appspot.com"
swarming_tags: "vpython:native-python-wrapper"
dimensions: "cpu:x86-64"
- dimensions: "os:Mac-10.15"
+ dimensions: "os:Mac-12"
dimensions: "pool:luci.flex.try"
exe {
cipd_package: "infra/recipe_bundles/chromium.googlesource.com/chromium/tools/build"
@@ -1174,6 +1318,10 @@ buckets {
' "server_host": "goma.chromium.org",'
' "use_luci_auth": true'
' },'
+ ' "$build/reclient": {'
+ ' "instance": "rbe-webrtc-untrusted",'
+ ' "metrics_project": "chromium-reclient-metrics"'
+ ' },'
' "builder_group": "tryserver.libyuv",'
' "recipe": "libyuv/libyuv"'
'}'
@@ -1205,6 +1353,10 @@ buckets {
' "server_host": "goma.chromium.org",'
' "use_luci_auth": true'
' },'
+ ' "$build/reclient": {'
+ ' "instance": "rbe-webrtc-untrusted",'
+ ' "metrics_project": "chromium-reclient-metrics"'
+ ' },'
' "builder_group": "tryserver.libyuv",'
' "recipe": "libyuv/libyuv"'
'}'
@@ -1236,6 +1388,10 @@ buckets {
' "server_host": "goma.chromium.org",'
' "use_luci_auth": true'
' },'
+ ' "$build/reclient": {'
+ ' "instance": "rbe-webrtc-untrusted",'
+ ' "metrics_project": "chromium-reclient-metrics"'
+ ' },'
' "builder_group": "tryserver.libyuv",'
' "recipe": "libyuv/libyuv"'
'}'
@@ -1267,6 +1423,10 @@ buckets {
' "server_host": "goma.chromium.org",'
' "use_luci_auth": true'
' },'
+ ' "$build/reclient": {'
+ ' "instance": "rbe-webrtc-untrusted",'
+ ' "metrics_project": "chromium-reclient-metrics"'
+ ' },'
' "builder_group": "tryserver.libyuv",'
' "recipe": "libyuv/libyuv"'
'}'
@@ -1298,6 +1458,10 @@ buckets {
' "server_host": "goma.chromium.org",'
' "use_luci_auth": true'
' },'
+ ' "$build/reclient": {'
+ ' "instance": "rbe-webrtc-untrusted",'
+ ' "metrics_project": "chromium-reclient-metrics"'
+ ' },'
' "builder_group": "tryserver.libyuv",'
' "recipe": "libyuv/libyuv"'
'}'
@@ -1329,6 +1493,10 @@ buckets {
' "server_host": "goma.chromium.org",'
' "use_luci_auth": true'
' },'
+ ' "$build/reclient": {'
+ ' "instance": "rbe-webrtc-untrusted",'
+ ' "metrics_project": "chromium-reclient-metrics"'
+ ' },'
' "builder_group": "tryserver.libyuv",'
' "recipe": "libyuv/libyuv"'
'}'
@@ -1360,6 +1528,10 @@ buckets {
' "server_host": "goma.chromium.org",'
' "use_luci_auth": true'
' },'
+ ' "$build/reclient": {'
+ ' "instance": "rbe-webrtc-untrusted",'
+ ' "metrics_project": "chromium-reclient-metrics"'
+ ' },'
' "builder_group": "tryserver.libyuv",'
' "recipe": "libyuv/libyuv"'
'}'
@@ -1391,6 +1563,10 @@ buckets {
' "server_host": "goma.chromium.org",'
' "use_luci_auth": true'
' },'
+ ' "$build/reclient": {'
+ ' "instance": "rbe-webrtc-untrusted",'
+ ' "metrics_project": "chromium-reclient-metrics"'
+ ' },'
' "builder_group": "tryserver.libyuv",'
' "recipe": "libyuv/libyuv"'
'}'
@@ -1422,6 +1598,10 @@ buckets {
' "server_host": "goma.chromium.org",'
' "use_luci_auth": true'
' },'
+ ' "$build/reclient": {'
+ ' "instance": "rbe-webrtc-untrusted",'
+ ' "metrics_project": "chromium-reclient-metrics"'
+ ' },'
' "builder_group": "tryserver.libyuv",'
' "recipe": "libyuv/libyuv"'
'}'
@@ -1438,7 +1618,7 @@ buckets {
swarming_host: "chromium-swarm.appspot.com"
swarming_tags: "vpython:native-python-wrapper"
dimensions: "cpu:x86-64"
- dimensions: "os:Mac-10.15"
+ dimensions: "os:Mac-12"
dimensions: "pool:luci.flex.try"
exe {
cipd_package: "infra/recipe_bundles/chromium.googlesource.com/chromium/tools/build"
@@ -1451,6 +1631,10 @@ buckets {
' "server_host": "goma.chromium.org",'
' "use_luci_auth": true'
' },'
+ ' "$build/reclient": {'
+ ' "instance": "rbe-webrtc-untrusted",'
+ ' "metrics_project": "chromium-reclient-metrics"'
+ ' },'
' "builder_group": "tryserver.libyuv",'
' "recipe": "libyuv/libyuv"'
'}'
@@ -1467,7 +1651,7 @@ buckets {
swarming_host: "chromium-swarm.appspot.com"
swarming_tags: "vpython:native-python-wrapper"
dimensions: "cpu:x86-64"
- dimensions: "os:Mac-10.15"
+ dimensions: "os:Mac-12"
dimensions: "pool:luci.flex.try"
exe {
cipd_package: "infra/recipe_bundles/chromium.googlesource.com/chromium/tools/build"
@@ -1480,6 +1664,10 @@ buckets {
' "server_host": "goma.chromium.org",'
' "use_luci_auth": true'
' },'
+ ' "$build/reclient": {'
+ ' "instance": "rbe-webrtc-untrusted",'
+ ' "metrics_project": "chromium-reclient-metrics"'
+ ' },'
' "builder_group": "tryserver.libyuv",'
' "recipe": "libyuv/libyuv"'
'}'
@@ -1496,7 +1684,7 @@ buckets {
swarming_host: "chromium-swarm.appspot.com"
swarming_tags: "vpython:native-python-wrapper"
dimensions: "cpu:x86-64"
- dimensions: "os:Mac-10.15"
+ dimensions: "os:Mac-12"
dimensions: "pool:luci.flex.try"
exe {
cipd_package: "infra/recipe_bundles/chromium.googlesource.com/chromium/tools/build"
@@ -1509,6 +1697,10 @@ buckets {
' "server_host": "goma.chromium.org",'
' "use_luci_auth": true'
' },'
+ ' "$build/reclient": {'
+ ' "instance": "rbe-webrtc-untrusted",'
+ ' "metrics_project": "chromium-reclient-metrics"'
+ ' },'
' "builder_group": "tryserver.libyuv",'
' "recipe": "libyuv/libyuv"'
'}'
@@ -1540,6 +1732,10 @@ buckets {
' "server_host": "goma.chromium.org",'
' "use_luci_auth": true'
' },'
+ ' "$build/reclient": {'
+ ' "instance": "rbe-webrtc-untrusted",'
+ ' "metrics_project": "chromium-reclient-metrics"'
+ ' },'
' "builder_group": "tryserver.libyuv",'
' "recipe": "run_presubmit",'
' "repo_name": "libyuv",'
@@ -1573,6 +1769,10 @@ buckets {
' "server_host": "goma.chromium.org",'
' "use_luci_auth": true'
' },'
+ ' "$build/reclient": {'
+ ' "instance": "rbe-webrtc-untrusted",'
+ ' "metrics_project": "chromium-reclient-metrics"'
+ ' },'
' "builder_group": "tryserver.libyuv",'
' "recipe": "libyuv/libyuv"'
'}'
@@ -1604,6 +1804,10 @@ buckets {
' "server_host": "goma.chromium.org",'
' "use_luci_auth": true'
' },'
+ ' "$build/reclient": {'
+ ' "instance": "rbe-webrtc-untrusted",'
+ ' "metrics_project": "chromium-reclient-metrics"'
+ ' },'
' "builder_group": "tryserver.libyuv",'
' "recipe": "libyuv/libyuv"'
'}'
@@ -1635,6 +1839,10 @@ buckets {
' "server_host": "goma.chromium.org",'
' "use_luci_auth": true'
' },'
+ ' "$build/reclient": {'
+ ' "instance": "rbe-webrtc-untrusted",'
+ ' "metrics_project": "chromium-reclient-metrics"'
+ ' },'
' "builder_group": "tryserver.libyuv",'
' "recipe": "libyuv/libyuv"'
'}'
@@ -1666,6 +1874,10 @@ buckets {
' "server_host": "goma.chromium.org",'
' "use_luci_auth": true'
' },'
+ ' "$build/reclient": {'
+ ' "instance": "rbe-webrtc-untrusted",'
+ ' "metrics_project": "chromium-reclient-metrics"'
+ ' },'
' "builder_group": "tryserver.libyuv",'
' "recipe": "libyuv/libyuv"'
'}'
@@ -1697,6 +1909,10 @@ buckets {
' "server_host": "goma.chromium.org",'
' "use_luci_auth": true'
' },'
+ ' "$build/reclient": {'
+ ' "instance": "rbe-webrtc-untrusted",'
+ ' "metrics_project": "chromium-reclient-metrics"'
+ ' },'
' "builder_group": "tryserver.libyuv",'
' "recipe": "libyuv/libyuv"'
'}'
@@ -1728,6 +1944,10 @@ buckets {
' "server_host": "goma.chromium.org",'
' "use_luci_auth": true'
' },'
+ ' "$build/reclient": {'
+ ' "instance": "rbe-webrtc-untrusted",'
+ ' "metrics_project": "chromium-reclient-metrics"'
+ ' },'
' "builder_group": "tryserver.libyuv",'
' "recipe": "libyuv/libyuv"'
'}'
diff --git a/files/infra/config/main.star b/files/infra/config/main.star
index b922ca02..7490a599 100755
--- a/files/infra/config/main.star
+++ b/files/infra/config/main.star
@@ -26,6 +26,16 @@ GOMA_BACKEND_RBE_NO_ATS_PROD = {
"enable_ats": False,
}
+RECLIENT_CI = {
+ "instance": "rbe-webrtc-trusted",
+ "metrics_project": "chromium-reclient-metrics",
+}
+
+RECLIENT_CQ = {
+ "instance": "rbe-webrtc-untrusted",
+ "metrics_project": "chromium-reclient-metrics",
+}
+
# Use LUCI Scheduler BBv2 names and add Scheduler realms configs.
lucicfg.enable_experiment("crbug.com/1182002")
@@ -70,6 +80,10 @@ luci.project(
],
bindings = [
luci.binding(
+ roles = "role/swarming.taskTriggerer", # for LED tasks.
+ groups = "project-libyuv-admins",
+ ),
+ luci.binding(
roles = "role/configs.validator",
users = "libyuv-try-builder@chops-service-accounts.iam.gserviceaccount.com",
),
@@ -195,9 +209,9 @@ luci.bucket(
def get_os_dimensions(os):
if os == "android":
- return {"device_type": "bullhead"}
+ return {"device_type": "walleye"}
if os == "ios" or os == "mac":
- return {"os": "Mac-10.15", "cpu": "x86-64"}
+ return {"os": "Mac-12", "cpu": "x86-64"}
elif os == "win":
return {"os": "Windows-10", "cores": "8", "cpu": "x86-64"}
elif os == "linux":
@@ -255,6 +269,7 @@ def libyuv_try_builder(name, dimensions, properties, recipe_name = "libyuv/libyu
def ci_builder(name, os, category, short_name = None):
dimensions = get_os_dimensions(os)
properties = get_os_properties(os)
+ properties["$build/reclient"] = RECLIENT_CI
dimensions["pool"] = "luci.flex.ci"
properties["builder_group"] = "client.libyuv"
@@ -266,6 +281,7 @@ def ci_builder(name, os, category, short_name = None):
def try_builder(name, os, experiment_percentage = None):
dimensions = get_os_dimensions(os)
properties = get_os_properties(os, try_builder = True)
+ properties["$build/reclient"] = RECLIENT_CQ
dimensions["pool"] = "luci.flex.try"
properties["builder_group"] = "tryserver.libyuv"
diff --git a/files/infra/config/project.cfg b/files/infra/config/project.cfg
index 700226ad..af79cfb2 100644
--- a/files/infra/config/project.cfg
+++ b/files/infra/config/project.cfg
@@ -7,7 +7,7 @@
name: "libyuv"
access: "group:all"
lucicfg {
- version: "1.30.9"
+ version: "1.39.8"
package_dir: "."
config_dir: "."
entry_point: "main.star"
diff --git a/files/infra/config/realms.cfg b/files/infra/config/realms.cfg
index ae04529e..16ffaac9 100644
--- a/files/infra/config/realms.cfg
+++ b/files/infra/config/realms.cfg
@@ -38,6 +38,10 @@ realms {
role: "role/scheduler.reader"
principals: "group:all"
}
+ bindings {
+ role: "role/swarming.taskTriggerer"
+ principals: "group:project-libyuv-admins"
+ }
}
realms {
name: "ci"
diff --git a/files/libyuv.gni b/files/libyuv.gni
index 8df40ba2..0a6c4453 100644
--- a/files/libyuv.gni
+++ b/files/libyuv.gni
@@ -6,13 +6,14 @@
# in the file PATENTS. All contributing project authors may
# be found in the AUTHORS file in the root of the source tree.
-import("//build_overrides/build.gni")
import("//build/config/arm.gni")
import("//build/config/mips.gni")
+import("//build_overrides/build.gni")
declare_args() {
libyuv_include_tests = !build_with_chromium
libyuv_disable_jpeg = false
+ libyuv_disable_rvv = false
libyuv_use_neon =
current_cpu == "arm64" ||
(current_cpu == "arm" && (arm_use_neon || arm_optionally_use_neon))
diff --git a/files/riscv_script/prepare_toolchain_qemu.sh b/files/riscv_script/prepare_toolchain_qemu.sh
new file mode 100755
index 00000000..2a901739
--- /dev/null
+++ b/files/riscv_script/prepare_toolchain_qemu.sh
@@ -0,0 +1,74 @@
+#!/bin/bash
+set -ev
+
+# Download & build RISC-V Clang toolchain & QEMU emulator.
+# RISC-V Clang is for cross compile with the RISC-V Vector ISA.
+# RISC-V QEMU is used to run the test suite.
+#
+# Requirements: Linux host w/ working C++ compiler, git, cmake, ninja, wget, tar
+
+# NOTE: this script must be run from the top-level directory of the LIBYUV_SRC_DIR.
+
+RISCV_TRIPLE="riscv64-unknown-linux-gnu"
+RISCV_QEMU="qemu-riscv64"
+
+LIBYUV_SRC_DIR=$(pwd)
+BUILD_DIR="$LIBYUV_SRC_DIR"/build-toolchain-qemu
+INSTALL_QEMU="$BUILD_DIR"/riscv-qemu
+INSTALL_CLANG="$BUILD_DIR"/riscv-clang
+
+LLVM_VERSION="16.0.0"
+LLVM_NAME=llvm-project-"$LLVM_VERSION".src
+
+RISCV_GNU_TOOLCHAIN="$BUILD_DIR"/riscv-gnu-toolchain
+RISCV_CLANG_TOOLCHAIN="$BUILD_DIR"/"$LLVM_NAME"
+
+QEMU_NAME="qemu-7.0.0"
+
+mkdir -p "$BUILD_DIR"
+cd "$BUILD_DIR"
+
+# Download and install RISC-V GNU Toolchain (needed to build Clang)
+if [ ! -d "$RISCV_GNU_TOOLCHAIN" ]
+then
+ git clone git@github.com:riscv/riscv-gnu-toolchain.git
+ pushd "$RISCV_GNU_TOOLCHAIN"
+ git submodule update --init --recursive
+ ./configure --with-cmodel=medany --prefix="$INSTALL_CLANG"
+ ionice nice make linux -j `nproc` install
+ popd
+fi
+
+# Download Clang toolchain & build cross compiler
+if [ ! -d "$RISCV_CLANG_TOOLCHAIN" ]
+then
+ wget https://github.com/llvm/llvm-project/releases/download/llvmorg-"$LLVM_VERSION"/"$LLVM_NAME".tar.xz
+ tar xvJf "$LLVM_NAME".tar.xz
+ pushd "$RISCV_CLANG_TOOLCHAIN"
+ cmake -DCMAKE_INSTALL_PREFIX="$INSTALL_CLANG" \
+ -DCMAKE_BUILD_TYPE=Release \
+ -DLLVM_TARGETS_TO_BUILD="RISCV" \
+ -DLLVM_ENABLE_PROJECTS="clang" \
+ -DLLVM_DEFAULT_TARGET_TRIPLE="$RISCV_TRIPLE" \
+ -DLLVM_INSTALL_TOOLCHAIN_ONLY=On \
+ -DDEFAULT_SYSROOT=../sysroot \
+ -G "Ninja" "$RISCV_CLANG_TOOLCHAIN"/llvm
+ ionice nice ninja -j `nproc`
+ ionice nice ninja -j `nproc` install
+ popd
+ pushd "$INSTALL_CLANG"/bin
+ ln -sf clang "$RISCV_TRIPLE"-clang
+ ln -sf clang++ "$RISCV_TRIPLE"-clang++
+ popd
+fi
+
+# Download QEMU and build the riscv64 Linux usermode emulator
+if [ ! -d "$QEMU_NAME" ]
+then
+ wget https://download.qemu.org/"$QEMU_NAME".tar.xz
+ tar xvJf "$QEMU_NAME".tar.xz
+ pushd "$QEMU_NAME"
+ ./configure --target-list=riscv64-linux-user --prefix="$INSTALL_QEMU"
+ ionice nice make -j `nproc` install
+ popd
+fi
diff --git a/files/riscv_script/riscv-clang.cmake b/files/riscv_script/riscv-clang.cmake
new file mode 100644
index 00000000..47dd5067
--- /dev/null
+++ b/files/riscv_script/riscv-clang.cmake
@@ -0,0 +1,52 @@
+set(CMAKE_CROSSCOMPILING TRUE)
+set(CMAKE_SYSTEM_NAME "Linux")
+set(CMAKE_SYSTEM_PROCESSOR "riscv64")
+
+option(USE_RVV "Enable riscv vector or not." ON)
+option(USE_AUTO_VECTORIZER "Enable riscv auto vectorizer or not." OFF)
+
+# Avoid to use system path for cross-compile
+set(CMAKE_FIND_USE_CMAKE_SYSTEM_PATH FALSE)
+
+set(TOOLCHAIN_PATH "" CACHE STRING "The toolcahin path.")
+if(NOT TOOLCHAIN_PATH)
+ set(TOOLCHAIN_PATH ${CMAKE_SOURCE_DIR}/build-toolchain-qemu/riscv-clang)
+endif()
+
+set(TOOLCHAIN_PREFIX "riscv64-unknown-linux-gnu-" CACHE STRING "The toolcahin prefix.")
+
+# toolchain setting
+set(CMAKE_C_COMPILER "${TOOLCHAIN_PATH}/bin/${TOOLCHAIN_PREFIX}clang")
+set(CMAKE_CXX_COMPILER "${TOOLCHAIN_PATH}/bin/${TOOLCHAIN_PREFIX}clang++")
+
+# CMake will just use the host-side tools for the following tools, so we setup them here.
+set(CMAKE_C_COMPILER_AR "${TOOLCHAIN_PATH}/bin/llvm-ar")
+set(CMAKE_CXX_COMPILER_AR "${TOOLCHAIN_PATH}/bin/llvm-ar")
+set(CMAKE_C_COMPILER_RANLIB "${TOOLCHAIN_PATH}/bin/llvm-ranlib")
+set(CMAKE_CXX_COMPILER_RANLIB "${TOOLCHAIN_PATH}/bin/llvm-ranlib")
+set(CMAKE_OBJDUMP "${TOOLCHAIN_PATH}/bin/llvm-objdump")
+set(CMAKE_OBJCOPY "${TOOLCHAIN_PATH}/bin/llvm-objcopy")
+
+# compile options
+message(STATUS "USE_RVV: ${USE_RVV}")
+message(STATUS "USE_AUTO_VECTORIZER: ${USE_AUTO_VECTORIZER}")
+set(RISCV_COMPILER_FLAGS)
+if(USE_RVV)
+ list(APPEND RISCV_COMPILER_FLAGS "-march=rv64gcv")
+ if(NOT USE_AUTO_VECTORIZER)
+ # Disable auto-vectorizer
+ add_compile_options(-fno-vectorize -fno-slp-vectorize)
+ endif()
+else()
+ list(APPEND RISCV_COMPILER_FLAGS "-march=rv64gc")
+endif()
+message(STATUS "RISCV_COMPILER_FLAGS: ${RISCV_COMPILER_FLAGS}")
+
+set(CMAKE_C_FLAGS "${RISCV_COMPILER_FLAGS} ${CMAKE_C_FLAGS}")
+set(CMAKE_CXX_FLAGS "${RISCV_COMPILER_FLAGS} ${CMAKE_CXX_FLAGS}")
+
+set(RISCV_LINKER_FLAGS "-lstdc++ -lpthread -lm -ldl")
+set(RISCV_LINKER_FLAGS_EXE)
+set(CMAKE_SHARED_LINKER_FLAGS "${RISCV_LINKER_FLAGS} ${CMAKE_SHARED_LINKER_FLAGS}")
+set(CMAKE_MODULE_LINKER_FLAGS "${RISCV_LINKER_FLAGS} ${CMAKE_MODULE_LINKER_FLAGS}")
+set(CMAKE_EXE_LINKER_FLAGS "${RISCV_LINKER_FLAGS} ${RISCV_LINKER_FLAGS_EXE} ${CMAKE_EXE_LINKER_FLAGS}")
diff --git a/files/riscv_script/run_qemu.sh b/files/riscv_script/run_qemu.sh
new file mode 100755
index 00000000..080af3b1
--- /dev/null
+++ b/files/riscv_script/run_qemu.sh
@@ -0,0 +1,15 @@
+#!/bin/bash
+set -x
+set -e
+
+USE_RVV="${USE_RVV:-OFF}"
+TOOLCHAIN_PATH="${TOOLCHAIN_PATH:-../../build-toolchain-qemu/riscv-clang}"
+QEMU_PREFIX_PATH="${QEMU_PREFIX_PATH:-../../build-toolchain-qemu/riscv-qemu/}"
+
+if [ "${USE_RVV}" = "ON" ];then
+ QEMU_OPTION="-cpu rv64,zba=true,zbb=true,zbc=true,zbs=true,v=true,vlen=512,elen=64,vext_spec=v1.0 -L ${TOOLCHAIN_PATH}/sysroot"
+else
+ QEMU_OPTION="-cpu rv64,zba=true,zbb=true,zbc=true,zbs=true -L ${TOOLCHAIN_PATH}/sysroot"
+fi
+
+$QEMU_PREFIX_PATH/bin/qemu-riscv64 $QEMU_OPTION $@
diff --git a/files/source/compare.cc b/files/source/compare.cc
index d4713b60..50a736bd 100644
--- a/files/source/compare.cc
+++ b/files/source/compare.cc
@@ -45,7 +45,7 @@ uint32_t HashDjb2(const uint8_t* src, uint64_t count, uint32_t seed) {
}
#endif
- while (count >= (uint64_t)(kBlockSize)) {
+ while (count >= (uint64_t)kBlockSize) {
seed = HashDjb2_SSE(src, kBlockSize, seed);
src += kBlockSize;
count -= kBlockSize;
@@ -359,10 +359,10 @@ static double Ssim8x8_C(const uint8_t* src_a,
(sum_a_sq + sum_b_sq + c1) *
(count * sum_sq_a - sum_a_sq + count * sum_sq_b - sum_b_sq + c2);
- if (ssim_d == 0.0) {
+ if (ssim_d == 0) {
return DBL_MAX;
}
- return ssim_n * 1.0 / ssim_d;
+ return (double)ssim_n / (double)ssim_d;
}
}
diff --git a/files/source/compare_gcc.cc b/files/source/compare_gcc.cc
index b834b42a..33cbe25d 100644
--- a/files/source/compare_gcc.cc
+++ b/files/source/compare_gcc.cc
@@ -67,7 +67,7 @@ uint32_t HammingDistance_SSE42(const uint8_t* src_a,
:
: "memory", "cc", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10");
- return static_cast<uint32_t>(diff);
+ return (uint32_t)(diff);
}
#else
uint32_t HammingDistance_SSE42(const uint8_t* src_a,
diff --git a/files/source/compare_mmi.cc b/files/source/compare_mmi.cc
deleted file mode 100644
index 7640d946..00000000
--- a/files/source/compare_mmi.cc
+++ /dev/null
@@ -1,123 +0,0 @@
-/*
- * Copyright 2012 The LibYuv Project Authors. All rights reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "libyuv/basic_types.h"
-
-#include "libyuv/compare_row.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-// This module is for Mips MMI.
-#if !defined(LIBYUV_DISABLE_MMI) && defined(_MIPS_ARCH_LOONGSON3A)
-
-// Hakmem method for hamming distance.
-uint32_t HammingDistance_MMI(const uint8_t* src_a,
- const uint8_t* src_b,
- int count) {
- uint32_t diff = 0u;
-
- uint64_t temp = 0, temp1 = 0, ta = 0, tb = 0;
- uint64_t c1 = 0x5555555555555555;
- uint64_t c2 = 0x3333333333333333;
- uint64_t c3 = 0x0f0f0f0f0f0f0f0f;
- uint32_t c4 = 0x01010101;
- uint64_t s1 = 1, s2 = 2, s3 = 4;
- __asm__ volatile(
- "1: \n\t"
- "ldc1 %[ta], 0(%[src_a]) \n\t"
- "ldc1 %[tb], 0(%[src_b]) \n\t"
- "xor %[temp], %[ta], %[tb] \n\t"
- "psrlw %[temp1], %[temp], %[s1] \n\t" // temp1=x>>1
- "and %[temp1], %[temp1], %[c1] \n\t" // temp1&=c1
- "psubw %[temp1], %[temp], %[temp1] \n\t" // x-temp1
- "and %[temp], %[temp1], %[c2] \n\t" // t = (u&c2)
- "psrlw %[temp1], %[temp1], %[s2] \n\t" // u>>2
- "and %[temp1], %[temp1], %[c2] \n\t" // u>>2 & c2
- "paddw %[temp1], %[temp1], %[temp] \n\t" // t1 = t1+t
- "psrlw %[temp], %[temp1], %[s3] \n\t" // u>>4
- "paddw %[temp1], %[temp1], %[temp] \n\t" // u+(u>>4)
- "and %[temp1], %[temp1], %[c3] \n\t" //&c3
- "dmfc1 $t0, %[temp1] \n\t"
- "dsrl32 $t0, $t0, 0 \n\t "
- "mul $t0, $t0, %[c4] \n\t"
- "dsrl $t0, $t0, 24 \n\t"
- "dadd %[diff], %[diff], $t0 \n\t"
- "dmfc1 $t0, %[temp1] \n\t"
- "mul $t0, $t0, %[c4] \n\t"
- "dsrl $t0, $t0, 24 \n\t"
- "dadd %[diff], %[diff], $t0 \n\t"
- "daddiu %[src_a], %[src_a], 8 \n\t"
- "daddiu %[src_b], %[src_b], 8 \n\t"
- "addiu %[count], %[count], -8 \n\t"
- "bgtz %[count], 1b \n\t"
- "nop \n\t"
- : [diff] "+r"(diff), [src_a] "+r"(src_a), [src_b] "+r"(src_b),
- [count] "+r"(count), [ta] "+f"(ta), [tb] "+f"(tb), [temp] "+f"(temp),
- [temp1] "+f"(temp1)
- : [c1] "f"(c1), [c2] "f"(c2), [c3] "f"(c3), [c4] "r"(c4), [s1] "f"(s1),
- [s2] "f"(s2), [s3] "f"(s3)
- : "memory");
- return diff;
-}
-
-uint32_t SumSquareError_MMI(const uint8_t* src_a,
- const uint8_t* src_b,
- int count) {
- uint32_t sse = 0u;
- uint32_t sse_hi = 0u, sse_lo = 0u;
-
- uint64_t src1, src2;
- uint64_t diff, diff_hi, diff_lo;
- uint64_t sse_sum, sse_tmp;
-
- const uint64_t mask = 0x0ULL;
-
- __asm__ volatile(
- "xor %[sse_sum], %[sse_sum], %[sse_sum] \n\t"
-
- "1: \n\t"
- "ldc1 %[src1], 0x00(%[src_a]) \n\t"
- "ldc1 %[src2], 0x00(%[src_b]) \n\t"
- "pasubub %[diff], %[src1], %[src2] \n\t"
- "punpcklbh %[diff_lo], %[diff], %[mask] \n\t"
- "punpckhbh %[diff_hi], %[diff], %[mask] \n\t"
- "pmaddhw %[sse_tmp], %[diff_lo], %[diff_lo] \n\t"
- "paddw %[sse_sum], %[sse_sum], %[sse_tmp] \n\t"
- "pmaddhw %[sse_tmp], %[diff_hi], %[diff_hi] \n\t"
- "paddw %[sse_sum], %[sse_sum], %[sse_tmp] \n\t"
-
- "daddiu %[src_a], %[src_a], 0x08 \n\t"
- "daddiu %[src_b], %[src_b], 0x08 \n\t"
- "daddiu %[count], %[count], -0x08 \n\t"
- "bnez %[count], 1b \n\t"
-
- "mfc1 %[sse_lo], %[sse_sum] \n\t"
- "mfhc1 %[sse_hi], %[sse_sum] \n\t"
- "daddu %[sse], %[sse_hi], %[sse_lo] \n\t"
- : [sse] "+&r"(sse), [diff] "=&f"(diff), [src1] "=&f"(src1),
- [src2] "=&f"(src2), [diff_lo] "=&f"(diff_lo), [diff_hi] "=&f"(diff_hi),
- [sse_sum] "=&f"(sse_sum), [sse_tmp] "=&f"(sse_tmp),
- [sse_hi] "+&r"(sse_hi), [sse_lo] "+&r"(sse_lo)
- : [src_a] "r"(src_a), [src_b] "r"(src_b), [count] "r"(count),
- [mask] "f"(mask)
- : "memory");
-
- return sse;
-}
-
-#endif // !defined(LIBYUV_DISABLE_MMI) && defined(_MIPS_ARCH_LOONGSON3A)
-
-#ifdef __cplusplus
-} // extern "C"
-} // namespace libyuv
-#endif
diff --git a/files/source/convert.cc b/files/source/convert.cc
index 7178580f..b11ab1bf 100644
--- a/files/source/convert.cc
+++ b/files/source/convert.cc
@@ -24,6 +24,10 @@ namespace libyuv {
extern "C" {
#endif
+// Subsample amount uses a shift.
+// v is value
+// a is amount to add to round up
+// s is shift to subsample down
#define SUBSAMPLE(v, a, s) (v < 0) ? (-((-v + a) >> s)) : ((v + a) >> s)
static __inline int Abs(int v) {
return v >= 0 ? v : -v;
@@ -199,6 +203,99 @@ static int Planar16bitTo8bit(const uint16_t* src_y,
return 0;
}
+static int I41xToI420(const uint16_t* src_y,
+ int src_stride_y,
+ const uint16_t* src_u,
+ int src_stride_u,
+ const uint16_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ uint8_t* dst_u,
+ int dst_stride_u,
+ uint8_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height,
+ int depth) {
+ const int scale = 1 << (24 - depth);
+
+ if (width <= 0 || height == 0) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ src_y = src_y + (height - 1) * src_stride_y;
+ src_u = src_u + (height - 1) * src_stride_u;
+ src_v = src_v + (height - 1) * src_stride_v;
+ src_stride_y = -src_stride_y;
+ src_stride_u = -src_stride_u;
+ src_stride_v = -src_stride_v;
+ }
+
+ {
+ const int uv_width = SUBSAMPLE(width, 1, 1);
+ const int uv_height = SUBSAMPLE(height, 1, 1);
+
+ Convert16To8Plane(src_y, src_stride_y, dst_y, dst_stride_y, scale, width,
+ height);
+ ScalePlaneDown2_16To8(width, height, uv_width, uv_height, src_stride_u,
+ dst_stride_u, src_u, dst_u, scale, kFilterBilinear);
+ ScalePlaneDown2_16To8(width, height, uv_width, uv_height, src_stride_v,
+ dst_stride_v, src_v, dst_v, scale, kFilterBilinear);
+ }
+ return 0;
+}
+
+static int I21xToI420(const uint16_t* src_y,
+ int src_stride_y,
+ const uint16_t* src_u,
+ int src_stride_u,
+ const uint16_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ uint8_t* dst_u,
+ int dst_stride_u,
+ uint8_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height,
+ int depth) {
+ const int scale = 1 << (24 - depth);
+
+ if (width <= 0 || height == 0) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ src_y = src_y + (height - 1) * src_stride_y;
+ src_u = src_u + (height - 1) * src_stride_u;
+ src_v = src_v + (height - 1) * src_stride_v;
+ src_stride_y = -src_stride_y;
+ src_stride_u = -src_stride_u;
+ src_stride_v = -src_stride_v;
+ }
+
+ {
+ const int uv_width = SUBSAMPLE(width, 1, 1);
+ const int uv_height = SUBSAMPLE(height, 1, 1);
+ const int dy = FixedDiv(height, uv_height);
+
+ Convert16To8Plane(src_y, src_stride_y, dst_y, dst_stride_y, scale, width,
+ height);
+ ScalePlaneVertical_16To8(height, uv_width, uv_height, src_stride_u,
+ dst_stride_u, src_u, dst_u, 0, 32768, dy,
+ /*bpp=*/1, scale, kFilterBilinear);
+ ScalePlaneVertical_16To8(height, uv_width, uv_height, src_stride_v,
+ dst_stride_v, src_v, dst_v, 0, 32768, dy,
+ /*bpp=*/1, scale, kFilterBilinear);
+ }
+ return 0;
+}
+
// Convert 10 bit YUV to 8 bit.
LIBYUV_API
int I010ToI420(const uint16_t* src_y,
@@ -236,38 +333,9 @@ int I210ToI420(const uint16_t* src_y,
int dst_stride_v,
int width,
int height) {
- const int depth = 10;
- const int scale = 1 << (24 - depth);
-
- if (width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- src_y = src_y + (height - 1) * src_stride_y;
- src_u = src_u + (height - 1) * src_stride_u;
- src_v = src_v + (height - 1) * src_stride_v;
- src_stride_y = -src_stride_y;
- src_stride_u = -src_stride_u;
- src_stride_v = -src_stride_v;
- }
-
- {
- const int uv_width = SUBSAMPLE(width, 1, 1);
- const int uv_height = SUBSAMPLE(height, 1, 1);
- const int dy = FixedDiv(height, uv_height);
-
- Convert16To8Plane(src_y, src_stride_y, dst_y, dst_stride_y, scale, width,
- height);
- ScalePlaneVertical_16To8(height, uv_width, uv_height, src_stride_u,
- dst_stride_u, src_u, dst_u, 0, 32768, dy,
- /*bpp=*/1, scale, kFilterBilinear);
- ScalePlaneVertical_16To8(height, uv_width, uv_height, src_stride_v,
- dst_stride_v, src_v, dst_v, 0, 32768, dy,
- /*bpp=*/1, scale, kFilterBilinear);
- }
- return 0;
+ return I21xToI420(src_y, src_stride_y, src_u, src_stride_u, src_v,
+ src_stride_v, dst_y, dst_stride_y, dst_u, dst_stride_u,
+ dst_v, dst_stride_v, width, height, 10);
}
LIBYUV_API
@@ -292,6 +360,26 @@ int I210ToI422(const uint16_t* src_y,
}
LIBYUV_API
+int I410ToI420(const uint16_t* src_y,
+ int src_stride_y,
+ const uint16_t* src_u,
+ int src_stride_u,
+ const uint16_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ uint8_t* dst_u,
+ int dst_stride_u,
+ uint8_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height) {
+ return I41xToI420(src_y, src_stride_y, src_u, src_stride_u, src_v,
+ src_stride_v, dst_y, dst_stride_y, dst_u, dst_stride_u,
+ dst_v, dst_stride_v, width, height, 10);
+}
+
+LIBYUV_API
int I410ToI444(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
@@ -355,6 +443,26 @@ int I212ToI422(const uint16_t* src_y,
}
LIBYUV_API
+int I212ToI420(const uint16_t* src_y,
+ int src_stride_y,
+ const uint16_t* src_u,
+ int src_stride_u,
+ const uint16_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ uint8_t* dst_u,
+ int dst_stride_u,
+ uint8_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height) {
+ return I21xToI420(src_y, src_stride_y, src_u, src_stride_u, src_v,
+ src_stride_v, dst_y, dst_stride_y, dst_u, dst_stride_u,
+ dst_v, dst_stride_v, width, height, 12);
+}
+
+LIBYUV_API
int I412ToI444(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
@@ -375,6 +483,26 @@ int I412ToI444(const uint16_t* src_y,
0, 12);
}
+LIBYUV_API
+int I412ToI420(const uint16_t* src_y,
+ int src_stride_y,
+ const uint16_t* src_u,
+ int src_stride_u,
+ const uint16_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ uint8_t* dst_u,
+ int dst_stride_u,
+ uint8_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height) {
+ return I41xToI420(src_y, src_stride_y, src_u, src_stride_u, src_v,
+ src_stride_v, dst_y, dst_stride_y, dst_u, dst_stride_u,
+ dst_v, dst_stride_v, width, height, 12);
+}
+
// Any Ix10 To I010 format with mirroring.
static int Ix10ToI010(const uint16_t* src_y,
int src_stride_y,
@@ -713,6 +841,110 @@ int MM21ToI420(const uint8_t* src_y,
return 0;
}
+LIBYUV_API
+int MM21ToYUY2(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_uv,
+ int src_stride_uv,
+ uint8_t* dst_yuy2,
+ int dst_stride_yuy2,
+ int width,
+ int height) {
+ if (!src_y || !src_uv || !dst_yuy2 || width <= 0) {
+ return -1;
+ }
+
+ DetileToYUY2(src_y, src_stride_y, src_uv, src_stride_uv, dst_yuy2,
+ dst_stride_yuy2, width, height, 32);
+
+ return 0;
+}
+
+// Convert MT2T into P010. See tinyurl.com/mtk-10bit-video-format for format
+// documentation.
+// TODO(greenjustin): Add an MT2T to I420 conversion.
+LIBYUV_API
+int MT2TToP010(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_uv,
+ int src_stride_uv,
+ uint16_t* dst_y,
+ int dst_stride_y,
+ uint16_t* dst_uv,
+ int dst_stride_uv,
+ int width,
+ int height) {
+ if (width <= 0 || !height || !src_uv || !dst_uv) {
+ return -1;
+ }
+
+ {
+ int uv_width = (width + 1) & ~1;
+ int uv_height = (height + 1) / 2;
+ int y = 0;
+ const int tile_width = 16;
+ const int y_tile_height = 32;
+ const int uv_tile_height = 16;
+ int padded_width = (width + tile_width - 1) & ~(tile_width - 1);
+ int y_tile_row_size = padded_width * y_tile_height * 10 / 8;
+ int uv_tile_row_size = padded_width * uv_tile_height * 10 / 8;
+ size_t row_buf_size = padded_width * y_tile_height * sizeof(uint16_t);
+ void (*UnpackMT2T)(const uint8_t* src, uint16_t* dst, size_t size) =
+ UnpackMT2T_C;
+ align_buffer_64(row_buf, row_buf_size);
+
+#if defined(HAS_UNPACKMT2T_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ UnpackMT2T = UnpackMT2T_NEON;
+ }
+#endif
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ uv_height = (height + 1) / 2;
+ if (dst_y) {
+ dst_y = dst_y + (height - 1) * dst_stride_y;
+ dst_stride_y = -dst_stride_y;
+ }
+ dst_uv = dst_uv + (uv_height - 1) * dst_stride_uv;
+ dst_stride_uv = -dst_stride_uv;
+ }
+
+ // Unpack and detile Y in rows of tiles
+ if (src_y && dst_y) {
+ for (y = 0; y < (height & ~(y_tile_height - 1)); y += y_tile_height) {
+ UnpackMT2T(src_y, (uint16_t*)row_buf, y_tile_row_size);
+ DetilePlane_16((uint16_t*)row_buf, padded_width, dst_y, dst_stride_y,
+ width, y_tile_height, y_tile_height);
+ src_y += src_stride_y * y_tile_height;
+ dst_y += dst_stride_y * y_tile_height;
+ }
+ if (height & (y_tile_height - 1)) {
+ UnpackMT2T(src_y, (uint16_t*)row_buf, y_tile_row_size);
+ DetilePlane_16((uint16_t*)row_buf, padded_width, dst_y, dst_stride_y,
+ width, height & (y_tile_height - 1), y_tile_height);
+ }
+ }
+
+ // Unpack and detile UV plane
+ for (y = 0; y < (uv_height & ~(uv_tile_height - 1)); y += uv_tile_height) {
+ UnpackMT2T(src_uv, (uint16_t*)row_buf, uv_tile_row_size);
+ DetilePlane_16((uint16_t*)row_buf, padded_width, dst_uv, dst_stride_uv,
+ uv_width, uv_tile_height, uv_tile_height);
+ src_uv += src_stride_uv * uv_tile_height;
+ dst_uv += dst_stride_uv * uv_tile_height;
+ }
+ if (uv_height & (uv_tile_height - 1)) {
+ UnpackMT2T(src_uv, (uint16_t*)row_buf, uv_tile_row_size);
+ DetilePlane_16((uint16_t*)row_buf, padded_width, dst_uv, dst_stride_uv,
+ uv_width, uv_height & (uv_tile_height - 1),
+ uv_tile_height);
+ }
+ free_aligned_buffer_64(row_buf);
+ }
+ return 0;
+}
+
#ifdef I422TONV21_ROW_VERSION
// Unittest fails for this version.
// 422 chroma is 1/2 width, 1x height
@@ -734,7 +966,7 @@ int I422ToNV21(const uint8_t* src_y,
int y;
void (*MergeUVRow)(const uint8_t* src_u, const uint8_t* src_v,
uint8_t* dst_uv, int width) = MergeUVRow_C;
- void (*InterpolateRow)(uint8_t * dst_ptr, const uint8_t* src_ptr,
+ void (*InterpolateRow)(uint8_t* dst_ptr, const uint8_t* src_ptr,
ptrdiff_t src_stride, int dst_width,
int source_y_fraction) = InterpolateRow_C;
int halfwidth = (width + 1) >> 1;
@@ -764,11 +996,19 @@ int I422ToNV21(const uint8_t* src_y,
#if defined(HAS_MERGEUVROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
MergeUVRow = MergeUVRow_Any_AVX2;
- if (IS_ALIGNED(halfwidth, 32)) {
+ if (IS_ALIGNED(halfwidth, 16)) {
MergeUVRow = MergeUVRow_AVX2;
}
}
#endif
+#if defined(HAS_MERGEUVROW_AVX512BW)
+ if (TestCpuFlag(kCpuHasAVX512BW)) {
+ MergeUVRow = MergeUVRow_Any_AVX512BW;
+ if (IS_ALIGNED(halfwidth, 32)) {
+ MergeUVRow = MergeUVRow_AVX512BW;
+ }
+ }
+#endif
#if defined(HAS_MERGEUVROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
MergeUVRow = MergeUVRow_Any_NEON;
@@ -793,6 +1033,11 @@ int I422ToNV21(const uint8_t* src_y,
}
}
#endif
+#if defined(HAS_MERGEUVROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ MergeUVRow = MergeUVRow_RVV;
+ }
+#endif
#if defined(HAS_INTERPOLATEROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
InterpolateRow = InterpolateRow_Any_SSSE3;
@@ -833,6 +1078,11 @@ int I422ToNV21(const uint8_t* src_y,
}
}
#endif
+#if defined(HAS_INTERPOLATEROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ InterpolateRow = InterpolateRow_RVV;
+ }
+#endif
if (dst_y) {
CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, halfwidth, height);
@@ -1118,6 +1368,70 @@ int NV16ToNV24(const uint8_t* src_y,
return 0;
}
+// Any P[420]1[02] to I[420]1[02] format with mirroring.
+static int PxxxToIxxx(const uint16_t* src_y,
+ int src_stride_y,
+ const uint16_t* src_uv,
+ int src_stride_uv,
+ uint16_t* dst_y,
+ int dst_stride_y,
+ uint16_t* dst_u,
+ int dst_stride_u,
+ uint16_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height,
+ int subsample_x,
+ int subsample_y,
+ int depth) {
+ const int uv_width = SUBSAMPLE(width, subsample_x, subsample_x);
+ const int uv_height = SUBSAMPLE(height, subsample_y, subsample_y);
+ if (width <= 0 || height == 0) {
+ return -1;
+ }
+ ConvertToLSBPlane_16(src_y, src_stride_y, dst_y, dst_stride_y, width, height,
+ depth);
+ SplitUVPlane_16(src_uv, src_stride_uv, dst_u, dst_stride_u, dst_v,
+ dst_stride_v, uv_width, uv_height, depth);
+ return 0;
+}
+
+LIBYUV_API
+int P010ToI010(const uint16_t* src_y,
+ int src_stride_y,
+ const uint16_t* src_uv,
+ int src_stride_uv,
+ uint16_t* dst_y,
+ int dst_stride_y,
+ uint16_t* dst_u,
+ int dst_stride_u,
+ uint16_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height) {
+ return PxxxToIxxx(src_y, src_stride_y, src_uv, src_stride_uv, dst_y,
+ dst_stride_y, dst_u, dst_stride_u, dst_v, dst_stride_v,
+ width, height, 1, 1, 10);
+}
+
+LIBYUV_API
+int P012ToI012(const uint16_t* src_y,
+ int src_stride_y,
+ const uint16_t* src_uv,
+ int src_stride_uv,
+ uint16_t* dst_y,
+ int dst_stride_y,
+ uint16_t* dst_u,
+ int dst_stride_u,
+ uint16_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height) {
+ return PxxxToIxxx(src_y, src_stride_y, src_uv, src_stride_uv, dst_y,
+ dst_stride_y, dst_u, dst_stride_u, dst_v, dst_stride_v,
+ width, height, 1, 1, 12);
+}
+
LIBYUV_API
int P010ToP410(const uint16_t* src_y,
int src_stride_y,
@@ -1231,6 +1545,16 @@ int YUY2ToI420(const uint8_t* src_yuy2,
}
}
#endif
+#if defined(HAS_YUY2TOYROW_LSX) && defined(HAS_YUY2TOUVROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ YUY2ToYRow = YUY2ToYRow_Any_LSX;
+ YUY2ToUVRow = YUY2ToUVRow_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ YUY2ToYRow = YUY2ToYRow_LSX;
+ YUY2ToUVRow = YUY2ToUVRow_LSX;
+ }
+ }
+#endif
#if defined(HAS_YUY2TOYROW_LASX) && defined(HAS_YUY2TOUVROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
YUY2ToYRow = YUY2ToYRow_Any_LASX;
@@ -1322,6 +1646,26 @@ int UYVYToI420(const uint8_t* src_uyvy,
}
}
#endif
+#if defined(HAS_UYVYTOYROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ UYVYToYRow = UYVYToYRow_Any_LSX;
+ UYVYToUVRow = UYVYToUVRow_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ UYVYToYRow = UYVYToYRow_LSX;
+ UYVYToUVRow = UYVYToUVRow_LSX;
+ }
+ }
+#endif
+#if defined(HAS_UYVYTOYROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ UYVYToYRow = UYVYToYRow_Any_LSX;
+ UYVYToUVRow = UYVYToUVRow_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ UYVYToYRow = UYVYToYRow_LSX;
+ UYVYToUVRow = UYVYToUVRow_LSX;
+ }
+ }
+#endif
#if defined(HAS_UYVYTOYROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
UYVYToYRow = UYVYToYRow_Any_LASX;
@@ -1574,6 +1918,24 @@ int ARGBToI420(const uint8_t* src_argb,
}
}
#endif
+#if defined(HAS_ARGBTOYROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ ARGBToYRow = ARGBToYRow_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBToYRow = ARGBToYRow_LSX;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTOYROW_LSX) && defined(HAS_ARGBTOUVROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ ARGBToYRow = ARGBToYRow_Any_LSX;
+ ARGBToUVRow = ARGBToUVRow_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBToYRow = ARGBToYRow_LSX;
+ ARGBToUVRow = ARGBToUVRow_LSX;
+ }
+ }
+#endif
#if defined(HAS_ARGBTOYROW_LASX) && defined(HAS_ARGBTOUVROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
ARGBToYRow = ARGBToYRow_Any_LASX;
@@ -1601,6 +1963,194 @@ int ARGBToI420(const uint8_t* src_argb,
return 0;
}
+#ifdef USE_EXTRACTALPHA
+// Convert ARGB to I420 with Alpha
+// The following version calls ARGBExtractAlpha on the full image.
+LIBYUV_API
+int ARGBToI420Alpha(const uint8_t* src_argb,
+ int src_stride_argb,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ uint8_t* dst_u,
+ int dst_stride_u,
+ uint8_t* dst_v,
+ int dst_stride_v,
+ uint8_t* dst_a,
+ int dst_stride_a,
+ int width,
+ int height) {
+ int r = ARGBToI420(src_argb, src_stride_argb, dst_y, dst_stride_y, dst_u,
+ dst_stride_u, dst_v, dst_stride_v, width, height);
+ if (r == 0) {
+ r = ARGBExtractAlpha(src_argb, src_stride_argb, dst_a, dst_stride_a, width,
+ height);
+ }
+ return r;
+}
+#else // USE_EXTRACTALPHA
+// Convert ARGB to I420 with Alpha
+LIBYUV_API
+int ARGBToI420Alpha(const uint8_t* src_argb,
+ int src_stride_argb,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ uint8_t* dst_u,
+ int dst_stride_u,
+ uint8_t* dst_v,
+ int dst_stride_v,
+ uint8_t* dst_a,
+ int dst_stride_a,
+ int width,
+ int height) {
+ int y;
+ void (*ARGBToUVRow)(const uint8_t* src_argb0, int src_stride_argb,
+ uint8_t* dst_u, uint8_t* dst_v, int width) =
+ ARGBToUVRow_C;
+ void (*ARGBToYRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) =
+ ARGBToYRow_C;
+ void (*ARGBExtractAlphaRow)(const uint8_t* src_argb, uint8_t* dst_a,
+ int width) = ARGBExtractAlphaRow_C;
+ if (!src_argb || !dst_y || !dst_u || !dst_v || !dst_a || width <= 0 ||
+ height == 0) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ src_argb = src_argb + (height - 1) * src_stride_argb;
+ src_stride_argb = -src_stride_argb;
+ }
+#if defined(HAS_ARGBTOYROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ ARGBToYRow = ARGBToYRow_Any_NEON;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBToYRow = ARGBToYRow_NEON;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTOUVROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ ARGBToUVRow = ARGBToUVRow_Any_NEON;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBToUVRow = ARGBToUVRow_NEON;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTOYROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ ARGBToYRow = ARGBToYRow_Any_SSSE3;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBToYRow = ARGBToYRow_SSSE3;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTOUVROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBToUVRow = ARGBToUVRow_SSSE3;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTOYROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ ARGBToYRow = ARGBToYRow_Any_AVX2;
+ if (IS_ALIGNED(width, 32)) {
+ ARGBToYRow = ARGBToYRow_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTOUVROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ ARGBToUVRow = ARGBToUVRow_Any_AVX2;
+ if (IS_ALIGNED(width, 32)) {
+ ARGBToUVRow = ARGBToUVRow_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTOYROW_MSA) && defined(HAS_ARGBTOUVROW_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ ARGBToYRow = ARGBToYRow_Any_MSA;
+ ARGBToUVRow = ARGBToUVRow_Any_MSA;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBToYRow = ARGBToYRow_MSA;
+ }
+ if (IS_ALIGNED(width, 32)) {
+ ARGBToUVRow = ARGBToUVRow_MSA;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTOYROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ ARGBToYRow = ARGBToYRow_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBToYRow = ARGBToYRow_LSX;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTOYROW_LASX) && defined(HAS_ARGBTOUVROW_LASX)
+ if (TestCpuFlag(kCpuHasLASX)) {
+ ARGBToYRow = ARGBToYRow_Any_LASX;
+ ARGBToUVRow = ARGBToUVRow_Any_LASX;
+ if (IS_ALIGNED(width, 32)) {
+ ARGBToYRow = ARGBToYRow_LASX;
+ ARGBToUVRow = ARGBToUVRow_LASX;
+ }
+ }
+#endif
+#if defined(HAS_ARGBEXTRACTALPHAROW_SSE2)
+ if (TestCpuFlag(kCpuHasSSE2)) {
+ ARGBExtractAlphaRow = IS_ALIGNED(width, 8) ? ARGBExtractAlphaRow_SSE2
+ : ARGBExtractAlphaRow_Any_SSE2;
+ }
+#endif
+#if defined(HAS_ARGBEXTRACTALPHAROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ ARGBExtractAlphaRow = IS_ALIGNED(width, 32) ? ARGBExtractAlphaRow_AVX2
+ : ARGBExtractAlphaRow_Any_AVX2;
+ }
+#endif
+#if defined(HAS_ARGBEXTRACTALPHAROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ ARGBExtractAlphaRow = IS_ALIGNED(width, 16) ? ARGBExtractAlphaRow_NEON
+ : ARGBExtractAlphaRow_Any_NEON;
+ }
+#endif
+#if defined(HAS_ARGBEXTRACTALPHAROW_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ ARGBExtractAlphaRow = IS_ALIGNED(width, 16) ? ARGBExtractAlphaRow_MSA
+ : ARGBExtractAlphaRow_Any_MSA;
+ }
+#endif
+#if defined(HAS_ARGBEXTRACTALPHAROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ ARGBExtractAlphaRow = IS_ALIGNED(width, 16) ? ARGBExtractAlphaRow_LSX
+ : ARGBExtractAlphaRow_Any_LSX;
+ }
+#endif
+
+ for (y = 0; y < height - 1; y += 2) {
+ ARGBToUVRow(src_argb, src_stride_argb, dst_u, dst_v, width);
+ ARGBToYRow(src_argb, dst_y, width);
+ ARGBToYRow(src_argb + src_stride_argb, dst_y + dst_stride_y, width);
+ ARGBExtractAlphaRow(src_argb, dst_a, width);
+ ARGBExtractAlphaRow(src_argb + src_stride_argb, dst_a + dst_stride_a,
+ width);
+ src_argb += src_stride_argb * 2;
+ dst_y += dst_stride_y * 2;
+ dst_u += dst_stride_u;
+ dst_v += dst_stride_v;
+ dst_a += dst_stride_a * 2;
+ }
+ if (height & 1) {
+ ARGBToUVRow(src_argb, 0, dst_u, dst_v, width);
+ ARGBToYRow(src_argb, dst_y, width);
+ ARGBExtractAlphaRow(src_argb, dst_a, width);
+ }
+ return 0;
+}
+#endif // USE_EXTRACTALPHA
+
// Convert BGRA to I420.
LIBYUV_API
int BGRAToI420(const uint8_t* src_bgra,
@@ -1628,16 +2178,6 @@ int BGRAToI420(const uint8_t* src_bgra,
src_bgra = src_bgra + (height - 1) * src_stride_bgra;
src_stride_bgra = -src_stride_bgra;
}
-#if defined(HAS_BGRATOYROW_SSSE3) && defined(HAS_BGRATOUVROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3)) {
- BGRAToUVRow = BGRAToUVRow_Any_SSSE3;
- BGRAToYRow = BGRAToYRow_Any_SSSE3;
- if (IS_ALIGNED(width, 16)) {
- BGRAToUVRow = BGRAToUVRow_SSSE3;
- BGRAToYRow = BGRAToYRow_SSSE3;
- }
- }
-#endif
#if defined(HAS_BGRATOYROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
BGRAToYRow = BGRAToYRow_Any_NEON;
@@ -1654,12 +2194,46 @@ int BGRAToI420(const uint8_t* src_bgra,
}
}
#endif
+#if defined(HAS_BGRATOYROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ BGRAToYRow = BGRAToYRow_Any_SSSE3;
+ if (IS_ALIGNED(width, 16)) {
+ BGRAToYRow = BGRAToYRow_SSSE3;
+ }
+ }
+#endif
+#if defined(HAS_BGRATOUVROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ BGRAToUVRow = BGRAToUVRow_Any_SSSE3;
+ if (IS_ALIGNED(width, 16)) {
+ BGRAToUVRow = BGRAToUVRow_SSSE3;
+ }
+ }
+#endif
+#if defined(HAS_BGRATOYROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ BGRAToYRow = BGRAToYRow_Any_AVX2;
+ if (IS_ALIGNED(width, 32)) {
+ BGRAToYRow = BGRAToYRow_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_BGRATOUVROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ BGRAToUVRow = BGRAToUVRow_Any_AVX2;
+ if (IS_ALIGNED(width, 32)) {
+ BGRAToUVRow = BGRAToUVRow_AVX2;
+ }
+ }
+#endif
#if defined(HAS_BGRATOYROW_MSA) && defined(HAS_BGRATOUVROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
BGRAToYRow = BGRAToYRow_Any_MSA;
BGRAToUVRow = BGRAToUVRow_Any_MSA;
if (IS_ALIGNED(width, 16)) {
BGRAToYRow = BGRAToYRow_MSA;
+ }
+ if (IS_ALIGNED(width, 32)) {
BGRAToUVRow = BGRAToUVRow_MSA;
}
}
@@ -1674,6 +2248,19 @@ int BGRAToI420(const uint8_t* src_bgra,
}
}
#endif
+#if defined(HAS_BGRATOYROW_LASX)
+ if (TestCpuFlag(kCpuHasLASX)) {
+ BGRAToYRow = BGRAToYRow_Any_LASX;
+ if (IS_ALIGNED(width, 32)) {
+ BGRAToYRow = BGRAToYRow_LASX;
+ }
+ }
+#endif
+#if defined(HAS_BGRATOYROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ BGRAToYRow = BGRAToYRow_RVV;
+ }
+#endif
for (y = 0; y < height - 1; y += 2) {
BGRAToUVRow(src_bgra, src_stride_bgra, dst_u, dst_v, width);
@@ -1786,6 +2373,19 @@ int ABGRToI420(const uint8_t* src_abgr,
}
}
#endif
+#if defined(HAS_ABGRTOYROW_LASX)
+ if (TestCpuFlag(kCpuHasLASX)) {
+ ABGRToYRow = ABGRToYRow_Any_LASX;
+ if (IS_ALIGNED(width, 32)) {
+ ABGRToYRow = ABGRToYRow_LASX;
+ }
+ }
+#endif
+#if defined(HAS_ABGRTOYROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ ABGRToYRow = ABGRToYRow_RVV;
+ }
+#endif
for (y = 0; y < height - 1; y += 2) {
ABGRToUVRow(src_abgr, src_stride_abgr, dst_u, dst_v, width);
@@ -1882,6 +2482,19 @@ int RGBAToI420(const uint8_t* src_rgba,
}
}
#endif
+#if defined(HAS_RGBATOYROW_LASX)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ RGBAToYRow = RGBAToYRow_Any_LASX;
+ if (IS_ALIGNED(width, 32)) {
+ RGBAToYRow = RGBAToYRow_LASX;
+ }
+ }
+#endif
+#if defined(HAS_RGBATOYROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ RGBAToYRow = RGBAToYRow_RVV;
+ }
+#endif
for (y = 0; y < height - 1; y += 2) {
RGBAToUVRow(src_rgba, src_stride_rgba, dst_u, dst_v, width);
@@ -1901,7 +2514,7 @@ int RGBAToI420(const uint8_t* src_rgba,
// Enabled if 1 pass is available
#if (defined(HAS_RGB24TOYROW_NEON) || defined(HAS_RGB24TOYROW_MSA) || \
- defined(HAS_RGB24TOYROW_LSX))
+ defined(HAS_RGB24TOYROW_LSX) || defined(HAS_RGB24TOYROW_RVV))
#define HAS_RGB24TOYROW
#endif
@@ -1986,6 +2599,11 @@ int RGB24ToI420(const uint8_t* src_rgb24,
}
}
#endif
+#if defined(HAS_RGB24TOYROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ RGB24ToYRow = RGB24ToYRow_RVV;
+ }
+#endif
// Other platforms do intermediate conversion from RGB24 to ARGB.
#else // HAS_RGB24TOYROW
@@ -2035,8 +2653,8 @@ int RGB24ToI420(const uint8_t* src_rgb24,
{
#if !defined(HAS_RGB24TOYROW)
// Allocate 2 rows of ARGB.
- const int kRowSize = (width * 4 + 31) & ~31;
- align_buffer_64(row, kRowSize * 2);
+ const int row_size = (width * 4 + 31) & ~31;
+ align_buffer_64(row, row_size * 2);
#endif
for (y = 0; y < height - 1; y += 2) {
@@ -2046,10 +2664,10 @@ int RGB24ToI420(const uint8_t* src_rgb24,
RGB24ToYRow(src_rgb24 + src_stride_rgb24, dst_y + dst_stride_y, width);
#else
RGB24ToARGBRow(src_rgb24, row, width);
- RGB24ToARGBRow(src_rgb24 + src_stride_rgb24, row + kRowSize, width);
- ARGBToUVRow(row, kRowSize, dst_u, dst_v, width);
+ RGB24ToARGBRow(src_rgb24 + src_stride_rgb24, row + row_size, width);
+ ARGBToUVRow(row, row_size, dst_u, dst_v, width);
ARGBToYRow(row, dst_y, width);
- ARGBToYRow(row + kRowSize, dst_y + dst_stride_y, width);
+ ARGBToYRow(row + row_size, dst_y + dst_stride_y, width);
#endif
src_rgb24 += src_stride_rgb24 * 2;
dst_y += dst_stride_y * 2;
@@ -2075,7 +2693,8 @@ int RGB24ToI420(const uint8_t* src_rgb24,
#undef HAS_RGB24TOYROW
// Enabled if 1 pass is available
-#if defined(HAS_RGB24TOYJROW_NEON) || defined(HAS_RGB24TOYJROW_MSA)
+#if defined(HAS_RGB24TOYJROW_NEON) || defined(HAS_RGB24TOYJROW_MSA) || \
+ defined(HAS_RGB24TOYJROW_RVV)
#define HAS_RGB24TOYJROW
#endif
@@ -2140,6 +2759,27 @@ int RGB24ToJ420(const uint8_t* src_rgb24,
}
}
#endif
+#if defined(HAS_RGB24TOYJROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ RGB24ToYJRow = RGB24ToYJRow_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ RGB24ToYJRow = RGB24ToYJRow_LSX;
+ }
+ }
+#endif
+#if defined(HAS_RGB24TOYJROW_LASX)
+ if (TestCpuFlag(kCpuHasLASX)) {
+ RGB24ToYJRow = RGB24ToYJRow_Any_LASX;
+ if (IS_ALIGNED(width, 32)) {
+ RGB24ToYJRow = RGB24ToYJRow_LASX;
+ }
+ }
+#endif
+#if defined(HAS_RGB24TOYJROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ RGB24ToYJRow = RGB24ToYJRow_RVV;
+ }
+#endif
// Other platforms do intermediate conversion from RGB24 to ARGB.
#else // HAS_RGB24TOYJROW
@@ -2189,8 +2829,8 @@ int RGB24ToJ420(const uint8_t* src_rgb24,
{
#if !defined(HAS_RGB24TOYJROW)
// Allocate 2 rows of ARGB.
- const int kRowSize = (width * 4 + 31) & ~31;
- align_buffer_64(row, kRowSize * 2);
+ const int row_size = (width * 4 + 31) & ~31;
+ align_buffer_64(row, row_size * 2);
#endif
for (y = 0; y < height - 1; y += 2) {
@@ -2200,10 +2840,10 @@ int RGB24ToJ420(const uint8_t* src_rgb24,
RGB24ToYJRow(src_rgb24 + src_stride_rgb24, dst_y + dst_stride_y, width);
#else
RGB24ToARGBRow(src_rgb24, row, width);
- RGB24ToARGBRow(src_rgb24 + src_stride_rgb24, row + kRowSize, width);
- ARGBToUVJRow(row, kRowSize, dst_u, dst_v, width);
+ RGB24ToARGBRow(src_rgb24 + src_stride_rgb24, row + row_size, width);
+ ARGBToUVJRow(row, row_size, dst_u, dst_v, width);
ARGBToYJRow(row, dst_y, width);
- ARGBToYJRow(row + kRowSize, dst_y + dst_stride_y, width);
+ ARGBToYJRow(row + row_size, dst_y + dst_stride_y, width);
#endif
src_rgb24 += src_stride_rgb24 * 2;
dst_y += dst_stride_y * 2;
@@ -2230,7 +2870,7 @@ int RGB24ToJ420(const uint8_t* src_rgb24,
// Enabled if 1 pass is available
#if (defined(HAS_RAWTOYROW_NEON) || defined(HAS_RAWTOYROW_MSA) || \
- defined(HAS_RAWTOYROW_LSX))
+ defined(HAS_RAWTOYROW_LSX) || defined(HAS_RAWTOYROW_RVV))
#define HAS_RAWTOYROW
#endif
@@ -2314,6 +2954,11 @@ int RAWToI420(const uint8_t* src_raw,
}
}
#endif
+#if defined(HAS_RAWTOYROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ RAWToYRow = RAWToYRow_RVV;
+ }
+#endif
// Other platforms do intermediate conversion from RAW to ARGB.
#else // HAS_RAWTOYROW
@@ -2363,8 +3008,8 @@ int RAWToI420(const uint8_t* src_raw,
{
#if !defined(HAS_RAWTOYROW)
// Allocate 2 rows of ARGB.
- const int kRowSize = (width * 4 + 31) & ~31;
- align_buffer_64(row, kRowSize * 2);
+ const int row_size = (width * 4 + 31) & ~31;
+ align_buffer_64(row, row_size * 2);
#endif
for (y = 0; y < height - 1; y += 2) {
@@ -2374,10 +3019,10 @@ int RAWToI420(const uint8_t* src_raw,
RAWToYRow(src_raw + src_stride_raw, dst_y + dst_stride_y, width);
#else
RAWToARGBRow(src_raw, row, width);
- RAWToARGBRow(src_raw + src_stride_raw, row + kRowSize, width);
- ARGBToUVRow(row, kRowSize, dst_u, dst_v, width);
+ RAWToARGBRow(src_raw + src_stride_raw, row + row_size, width);
+ ARGBToUVRow(row, row_size, dst_u, dst_v, width);
ARGBToYRow(row, dst_y, width);
- ARGBToYRow(row + kRowSize, dst_y + dst_stride_y, width);
+ ARGBToYRow(row + row_size, dst_y + dst_stride_y, width);
#endif
src_raw += src_stride_raw * 2;
dst_y += dst_stride_y * 2;
@@ -2403,7 +3048,8 @@ int RAWToI420(const uint8_t* src_raw,
#undef HAS_RAWTOYROW
// Enabled if 1 pass is available
-#if defined(HAS_RAWTOYJROW_NEON) || defined(HAS_RAWTOYJROW_MSA)
+#if defined(HAS_RAWTOYJROW_NEON) || defined(HAS_RAWTOYJROW_MSA) || \
+ defined(HAS_RAWTOYJROW_RVV)
#define HAS_RAWTOYJROW
#endif
@@ -2468,6 +3114,27 @@ int RAWToJ420(const uint8_t* src_raw,
}
}
#endif
+#if defined(HAS_RAWTOYJROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ RAWToYJRow = RAWToYJRow_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ RAWToYJRow = RAWToYJRow_LSX;
+ }
+ }
+#endif
+#if defined(HAS_RAWTOYJROW_LASX)
+ if (TestCpuFlag(kCpuHasLASX)) {
+ RAWToYJRow = RAWToYJRow_Any_LASX;
+ if (IS_ALIGNED(width, 32)) {
+ RAWToYJRow = RAWToYJRow_LASX;
+ }
+ }
+#endif
+#if defined(HAS_RAWTOYJROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ RAWToYJRow = RAWToYJRow_RVV;
+ }
+#endif
// Other platforms do intermediate conversion from RAW to ARGB.
#else // HAS_RAWTOYJROW
@@ -2517,8 +3184,8 @@ int RAWToJ420(const uint8_t* src_raw,
{
#if !defined(HAS_RAWTOYJROW)
// Allocate 2 rows of ARGB.
- const int kRowSize = (width * 4 + 31) & ~31;
- align_buffer_64(row, kRowSize * 2);
+ const int row_size = (width * 4 + 31) & ~31;
+ align_buffer_64(row, row_size * 2);
#endif
for (y = 0; y < height - 1; y += 2) {
@@ -2528,10 +3195,10 @@ int RAWToJ420(const uint8_t* src_raw,
RAWToYJRow(src_raw + src_stride_raw, dst_y + dst_stride_y, width);
#else
RAWToARGBRow(src_raw, row, width);
- RAWToARGBRow(src_raw + src_stride_raw, row + kRowSize, width);
- ARGBToUVJRow(row, kRowSize, dst_u, dst_v, width);
+ RAWToARGBRow(src_raw + src_stride_raw, row + row_size, width);
+ ARGBToUVJRow(row, row_size, dst_u, dst_v, width);
ARGBToYJRow(row, dst_y, width);
- ARGBToYJRow(row + kRowSize, dst_y + dst_stride_y, width);
+ ARGBToYJRow(row + row_size, dst_y + dst_stride_y, width);
#endif
src_raw += src_stride_raw * 2;
dst_y += dst_stride_y * 2;
@@ -2695,8 +3362,8 @@ int RGB565ToI420(const uint8_t* src_rgb565,
#if !(defined(HAS_RGB565TOYROW_NEON) || defined(HAS_RGB565TOYROW_MSA) || \
defined(HAS_RGB565TOYROW_LSX) || defined(HAS_RGB565TOYROW_LASX))
// Allocate 2 rows of ARGB.
- const int kRowSize = (width * 4 + 31) & ~31;
- align_buffer_64(row, kRowSize * 2);
+ const int row_size = (width * 4 + 31) & ~31;
+ align_buffer_64(row, row_size * 2);
#endif
for (y = 0; y < height - 1; y += 2) {
#if (defined(HAS_RGB565TOYROW_NEON) || defined(HAS_RGB565TOYROW_MSA) || \
@@ -2706,10 +3373,10 @@ int RGB565ToI420(const uint8_t* src_rgb565,
RGB565ToYRow(src_rgb565 + src_stride_rgb565, dst_y + dst_stride_y, width);
#else
RGB565ToARGBRow(src_rgb565, row, width);
- RGB565ToARGBRow(src_rgb565 + src_stride_rgb565, row + kRowSize, width);
- ARGBToUVRow(row, kRowSize, dst_u, dst_v, width);
+ RGB565ToARGBRow(src_rgb565 + src_stride_rgb565, row + row_size, width);
+ ARGBToUVRow(row, row_size, dst_u, dst_v, width);
ARGBToYRow(row, dst_y, width);
- ARGBToYRow(row + kRowSize, dst_y + dst_stride_y, width);
+ ARGBToYRow(row + row_size, dst_y + dst_stride_y, width);
#endif
src_rgb565 += src_stride_rgb565 * 2;
dst_y += dst_stride_y * 2;
@@ -2875,8 +3542,8 @@ int ARGB1555ToI420(const uint8_t* src_argb1555,
#if !(defined(HAS_ARGB1555TOYROW_NEON) || defined(HAS_ARGB1555TOYROW_MSA) || \
defined(HAS_ARGB1555TOYROW_LSX) || defined(HAS_ARGB1555TOYROW_LASX))
// Allocate 2 rows of ARGB.
- const int kRowSize = (width * 4 + 31) & ~31;
- align_buffer_64(row, kRowSize * 2);
+ const int row_size = (width * 4 + 31) & ~31;
+ align_buffer_64(row, row_size * 2);
#endif
for (y = 0; y < height - 1; y += 2) {
@@ -2888,11 +3555,11 @@ int ARGB1555ToI420(const uint8_t* src_argb1555,
width);
#else
ARGB1555ToARGBRow(src_argb1555, row, width);
- ARGB1555ToARGBRow(src_argb1555 + src_stride_argb1555, row + kRowSize,
+ ARGB1555ToARGBRow(src_argb1555 + src_stride_argb1555, row + row_size,
width);
- ARGBToUVRow(row, kRowSize, dst_u, dst_v, width);
+ ARGBToUVRow(row, row_size, dst_u, dst_v, width);
ARGBToYRow(row, dst_y, width);
- ARGBToYRow(row + kRowSize, dst_y + dst_stride_y, width);
+ ARGBToYRow(row + row_size, dst_y + dst_stride_y, width);
#endif
src_argb1555 += src_stride_argb1555 * 2;
dst_y += dst_stride_y * 2;
@@ -3055,6 +3722,24 @@ int ARGB4444ToI420(const uint8_t* src_argb4444,
}
}
#endif
+#if defined(HAS_ARGBTOYROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ ARGBToYRow = ARGBToYRow_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBToYRow = ARGBToYRow_LSX;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTOYROW_LSX) && defined(HAS_ARGBTOUVROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ ARGBToYRow = ARGBToYRow_Any_LSX;
+ ARGBToUVRow = ARGBToUVRow_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBToYRow = ARGBToYRow_LSX;
+ ARGBToUVRow = ARGBToUVRow_LSX;
+ }
+ }
+#endif
#if defined(HAS_ARGBTOYROW_LASX) && defined(HAS_ARGBTOUVROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
ARGBToYRow = ARGBToYRow_Any_LASX;
@@ -3070,8 +3755,8 @@ int ARGB4444ToI420(const uint8_t* src_argb4444,
{
#if !(defined(HAS_ARGB4444TOYROW_NEON))
// Allocate 2 rows of ARGB.
- const int kRowSize = (width * 4 + 31) & ~31;
- align_buffer_64(row, kRowSize * 2);
+ const int row_size = (width * 4 + 31) & ~31;
+ align_buffer_64(row, row_size * 2);
#endif
for (y = 0; y < height - 1; y += 2) {
@@ -3082,11 +3767,11 @@ int ARGB4444ToI420(const uint8_t* src_argb4444,
width);
#else
ARGB4444ToARGBRow(src_argb4444, row, width);
- ARGB4444ToARGBRow(src_argb4444 + src_stride_argb4444, row + kRowSize,
+ ARGB4444ToARGBRow(src_argb4444 + src_stride_argb4444, row + row_size,
width);
- ARGBToUVRow(row, kRowSize, dst_u, dst_v, width);
+ ARGBToUVRow(row, row_size, dst_u, dst_v, width);
ARGBToYRow(row, dst_y, width);
- ARGBToYRow(row + kRowSize, dst_y + dst_stride_y, width);
+ ARGBToYRow(row + row_size, dst_y + dst_stride_y, width);
#endif
src_argb4444 += src_stride_argb4444 * 2;
dst_y += dst_stride_y * 2;
@@ -3167,6 +3852,27 @@ int RGB24ToJ400(const uint8_t* src_rgb24,
}
}
#endif
+#if defined(HAS_RGB24TOYJROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ RGB24ToYJRow = RGB24ToYJRow_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ RGB24ToYJRow = RGB24ToYJRow_LSX;
+ }
+ }
+#endif
+#if defined(HAS_RGB24TOYJROW_LASX)
+ if (TestCpuFlag(kCpuHasLASX)) {
+ RGB24ToYJRow = RGB24ToYJRow_Any_LASX;
+ if (IS_ALIGNED(width, 32)) {
+ RGB24ToYJRow = RGB24ToYJRow_LASX;
+ }
+ }
+#endif
+#if defined(HAS_RGB24TOYJROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ RGB24ToYJRow = RGB24ToYJRow_RVV;
+ }
+#endif
for (y = 0; y < height; ++y) {
RGB24ToYJRow(src_rgb24, dst_yj, width);
@@ -3235,6 +3941,27 @@ int RAWToJ400(const uint8_t* src_raw,
}
}
#endif
+#if defined(HAS_RAWTOYJROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ RAWToYJRow = RAWToYJRow_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ RAWToYJRow = RAWToYJRow_LSX;
+ }
+ }
+#endif
+#if defined(HAS_RAWTOYJROW_LASX)
+ if (TestCpuFlag(kCpuHasLASX)) {
+ RAWToYJRow = RAWToYJRow_Any_LASX;
+ if (IS_ALIGNED(width, 32)) {
+ RAWToYJRow = RAWToYJRow_LASX;
+ }
+ }
+#endif
+#if defined(HAS_RAWTOYJROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ RAWToYJRow = RAWToYJRow_RVV;
+ }
+#endif
for (y = 0; y < height; ++y) {
RAWToYJRow(src_raw, dst_yj, width);
diff --git a/files/source/convert_argb.cc b/files/source/convert_argb.cc
index 71ef8c10..cc6560de 100644
--- a/files/source/convert_argb.cc
+++ b/files/source/convert_argb.cc
@@ -7,8 +7,12 @@
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
+
#include "libyuv/convert_argb.h"
+#include <assert.h>
+
+#include "libyuv/convert_from_argb.h"
#include "libyuv/cpu_id.h"
#ifdef HAVE_JPEG
#include "libyuv/mjpeg_decoder.h"
@@ -65,6 +69,7 @@ int I420ToARGBMatrix(const uint8_t* src_y,
const uint8_t* v_buf, uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants, int width) =
I422ToARGBRow_C;
+ assert(yuvconstants);
if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) {
return -1;
}
@@ -115,6 +120,14 @@ int I420ToARGBMatrix(const uint8_t* src_y,
}
}
#endif
+#if defined(HAS_I422TOARGBROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ I422ToARGBRow = I422ToARGBRow_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ I422ToARGBRow = I422ToARGBRow_LSX;
+ }
+ }
+#endif
#if defined(HAS_I422TOARGBROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
I422ToARGBRow = I422ToARGBRow_Any_LASX;
@@ -123,6 +136,11 @@ int I420ToARGBMatrix(const uint8_t* src_y,
}
}
#endif
+#if defined(HAS_I422TOARGBROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ I422ToARGBRow = I422ToARGBRow_RVV;
+ }
+#endif
for (y = 0; y < height; ++y) {
I422ToARGBRow(src_y, src_u, src_v, dst_argb, yuvconstants, width);
@@ -298,6 +316,7 @@ int I422ToARGBMatrix(const uint8_t* src_y,
const uint8_t* v_buf, uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants, int width) =
I422ToARGBRow_C;
+ assert(yuvconstants);
if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) {
return -1;
}
@@ -355,6 +374,14 @@ int I422ToARGBMatrix(const uint8_t* src_y,
}
}
#endif
+#if defined(HAS_I422TOARGBROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ I422ToARGBRow = I422ToARGBRow_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ I422ToARGBRow = I422ToARGBRow_LSX;
+ }
+ }
+#endif
#if defined(HAS_I422TOARGBROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
I422ToARGBRow = I422ToARGBRow_Any_LASX;
@@ -363,6 +390,11 @@ int I422ToARGBMatrix(const uint8_t* src_y,
}
}
#endif
+#if defined(HAS_I422TOARGBROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ I422ToARGBRow = I422ToARGBRow_RVV;
+ }
+#endif
for (y = 0; y < height; ++y) {
I422ToARGBRow(src_y, src_u, src_v, dst_argb, yuvconstants, width);
@@ -536,6 +568,7 @@ int I444ToARGBMatrix(const uint8_t* src_y,
const uint8_t* v_buf, uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants, int width) =
I444ToARGBRow_C;
+ assert(yuvconstants);
if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) {
return -1;
}
@@ -592,6 +625,11 @@ int I444ToARGBMatrix(const uint8_t* src_y,
}
}
#endif
+#if defined(HAS_I444TOARGBROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ I444ToARGBRow = I444ToARGBRow_RVV;
+ }
+#endif
for (y = 0; y < height; ++y) {
I444ToARGBRow(src_y, src_u, src_v, dst_argb, yuvconstants, width);
@@ -747,6 +785,133 @@ int U444ToABGR(const uint8_t* src_y,
width, height);
}
+// Convert I444 to RGB24 with matrix.
+LIBYUV_API
+int I444ToRGB24Matrix(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_rgb24,
+ int dst_stride_rgb24,
+ const struct YuvConstants* yuvconstants,
+ int width,
+ int height) {
+ int y;
+ void (*I444ToRGB24Row)(const uint8_t* y_buf, const uint8_t* u_buf,
+ const uint8_t* v_buf, uint8_t* rgb_buf,
+ const struct YuvConstants* yuvconstants, int width) =
+ I444ToRGB24Row_C;
+ assert(yuvconstants);
+ if (!src_y || !src_u || !src_v || !dst_rgb24 || width <= 0 || height == 0) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ dst_rgb24 = dst_rgb24 + (height - 1) * dst_stride_rgb24;
+ dst_stride_rgb24 = -dst_stride_rgb24;
+ }
+ // Coalesce rows.
+ if (src_stride_y == width && src_stride_u == width && src_stride_v == width &&
+ dst_stride_rgb24 == width * 3) {
+ width *= height;
+ height = 1;
+ src_stride_y = src_stride_u = src_stride_v = dst_stride_rgb24 = 0;
+ }
+#if defined(HAS_I444TORGB24ROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ I444ToRGB24Row = I444ToRGB24Row_Any_SSSE3;
+ if (IS_ALIGNED(width, 16)) {
+ I444ToRGB24Row = I444ToRGB24Row_SSSE3;
+ }
+ }
+#endif
+#if defined(HAS_I444TORGB24ROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ I444ToRGB24Row = I444ToRGB24Row_Any_AVX2;
+ if (IS_ALIGNED(width, 32)) {
+ I444ToRGB24Row = I444ToRGB24Row_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_I444TORGB24ROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ I444ToRGB24Row = I444ToRGB24Row_Any_NEON;
+ if (IS_ALIGNED(width, 8)) {
+ I444ToRGB24Row = I444ToRGB24Row_NEON;
+ }
+ }
+#endif
+#if defined(HAS_I444TORGB24ROW_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ I444ToRGB24Row = I444ToRGB24Row_Any_MSA;
+ if (IS_ALIGNED(width, 8)) {
+ I444ToRGB24Row = I444ToRGB24Row_MSA;
+ }
+ }
+#endif
+#if defined(HAS_I444TORGB24ROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ I444ToRGB24Row = I444ToRGB24Row_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ I444ToRGB24Row = I444ToRGB24Row_LSX;
+ }
+ }
+#endif
+#if defined(HAS_I444TORGB24ROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ I444ToRGB24Row = I444ToRGB24Row_RVV;
+ }
+#endif
+
+ for (y = 0; y < height; ++y) {
+ I444ToRGB24Row(src_y, src_u, src_v, dst_rgb24, yuvconstants, width);
+ dst_rgb24 += dst_stride_rgb24;
+ src_y += src_stride_y;
+ src_u += src_stride_u;
+ src_v += src_stride_v;
+ }
+ return 0;
+}
+
+// Convert I444 to RGB24.
+LIBYUV_API
+int I444ToRGB24(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_rgb24,
+ int dst_stride_rgb24,
+ int width,
+ int height) {
+ return I444ToRGB24Matrix(src_y, src_stride_y, src_u, src_stride_u, src_v,
+ src_stride_v, dst_rgb24, dst_stride_rgb24,
+ &kYuvI601Constants, width, height);
+}
+
+// Convert I444 to RAW.
+LIBYUV_API
+int I444ToRAW(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_raw,
+ int dst_stride_raw,
+ int width,
+ int height) {
+ return I444ToRGB24Matrix(src_y, src_stride_y, src_v,
+ src_stride_v, // Swap U and V
+ src_u, src_stride_u, dst_raw, dst_stride_raw,
+ &kYvuI601Constants, // Use Yvu matrix
+ width, height);
+}
+
// Convert 10 bit YUV to ARGB with matrix.
// TODO(fbarchard): Consider passing scale multiplier to I210ToARGB to
// multiply 10 bit yuv into high bits to allow any number of bits.
@@ -767,6 +932,7 @@ int I010ToAR30Matrix(const uint16_t* src_y,
const uint16_t* v_buf, uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants, int width) =
I210ToAR30Row_C;
+ assert(yuvconstants);
if (!src_y || !src_u || !src_v || !dst_ar30 || width <= 0 || height == 0) {
return -1;
}
@@ -926,6 +1092,7 @@ int I012ToAR30Matrix(const uint16_t* src_y,
const uint16_t* v_buf, uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants, int width) =
I212ToAR30Row_C;
+ assert(yuvconstants);
if (!src_y || !src_u || !src_v || !dst_ar30 || width <= 0 || height == 0) {
return -1;
}
@@ -983,6 +1150,7 @@ int I210ToAR30Matrix(const uint16_t* src_y,
const uint16_t* v_buf, uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants, int width) =
I210ToAR30Row_C;
+ assert(yuvconstants);
if (!src_y || !src_u || !src_v || !dst_ar30 || width <= 0 || height == 0) {
return -1;
}
@@ -1137,6 +1305,7 @@ int I410ToAR30Matrix(const uint16_t* src_y,
const uint16_t* v_buf, uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants, int width) =
I410ToAR30Row_C;
+ assert(yuvconstants);
if (!src_y || !src_u || !src_v || !dst_ar30 || width <= 0 || height == 0) {
return -1;
}
@@ -1190,6 +1359,7 @@ int I010ToARGBMatrix(const uint16_t* src_y,
const uint16_t* v_buf, uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants, int width) =
I210ToARGBRow_C;
+ assert(yuvconstants);
if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) {
return -1;
}
@@ -1353,6 +1523,7 @@ int I012ToARGBMatrix(const uint16_t* src_y,
const uint16_t* v_buf, uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants, int width) =
I212ToARGBRow_C;
+ assert(yuvconstants);
if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) {
return -1;
}
@@ -1408,6 +1579,7 @@ int I210ToARGBMatrix(const uint16_t* src_y,
const uint16_t* v_buf, uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants, int width) =
I210ToARGBRow_C;
+ assert(yuvconstants);
if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) {
return -1;
}
@@ -1568,6 +1740,7 @@ int I410ToARGBMatrix(const uint16_t* src_y,
const uint16_t* v_buf, uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants, int width) =
I410ToARGBRow_C;
+ assert(yuvconstants);
if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) {
return -1;
}
@@ -1617,6 +1790,7 @@ int P010ToARGBMatrix(const uint16_t* src_y,
void (*P210ToARGBRow)(
const uint16_t* y_buf, const uint16_t* uv_buf, uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants, int width) = P210ToARGBRow_C;
+ assert(yuvconstants);
if (!src_y || !src_uv || !dst_argb || width <= 0 || height == 0) {
return -1;
}
@@ -1667,6 +1841,7 @@ int P210ToARGBMatrix(const uint16_t* src_y,
void (*P210ToARGBRow)(
const uint16_t* y_buf, const uint16_t* uv_buf, uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants, int width) = P210ToARGBRow_C;
+ assert(yuvconstants);
if (!src_y || !src_uv || !dst_argb || width <= 0 || height == 0) {
return -1;
}
@@ -1715,6 +1890,7 @@ int P010ToAR30Matrix(const uint16_t* src_y,
void (*P210ToAR30Row)(
const uint16_t* y_buf, const uint16_t* uv_buf, uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants, int width) = P210ToAR30Row_C;
+ assert(yuvconstants);
if (!src_y || !src_uv || !dst_ar30 || width <= 0 || height == 0) {
return -1;
}
@@ -1765,6 +1941,7 @@ int P210ToAR30Matrix(const uint16_t* src_y,
void (*P210ToAR30Row)(
const uint16_t* y_buf, const uint16_t* uv_buf, uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants, int width) = P210ToAR30Row_C;
+ assert(yuvconstants);
if (!src_y || !src_uv || !dst_ar30 || width <= 0 || height == 0) {
return -1;
}
@@ -1823,6 +2000,7 @@ int I420AlphaToARGBMatrix(const uint8_t* src_y,
int width) = I422AlphaToARGBRow_C;
void (*ARGBAttenuateRow)(const uint8_t* src_argb, uint8_t* dst_argb,
int width) = ARGBAttenuateRow_C;
+ assert(yuvconstants);
if (!src_y || !src_u || !src_v || !src_a || !dst_argb || width <= 0 ||
height == 0) {
return -1;
@@ -1865,6 +2043,14 @@ int I420AlphaToARGBMatrix(const uint8_t* src_y,
}
}
#endif
+#if defined(HAS_I422ALPHATOARGBROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ I422AlphaToARGBRow = I422AlphaToARGBRow_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ I422AlphaToARGBRow = I422AlphaToARGBRow_LSX;
+ }
+ }
+#endif
#if defined(HAS_I422ALPHATOARGBROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
I422AlphaToARGBRow = I422AlphaToARGBRow_Any_LASX;
@@ -1873,6 +2059,11 @@ int I420AlphaToARGBMatrix(const uint8_t* src_y,
}
}
#endif
+#if defined(HAS_I422ALPHATOARGBROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ I422AlphaToARGBRow = I422AlphaToARGBRow_RVV;
+ }
+#endif
#if defined(HAS_ARGBATTENUATEROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBAttenuateRow = ARGBAttenuateRow_Any_SSSE3;
@@ -1905,6 +2096,11 @@ int I420AlphaToARGBMatrix(const uint8_t* src_y,
}
}
#endif
+#if defined(HAS_ARGBATTENUATEROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ ARGBAttenuateRow = ARGBAttenuateRow_RVV;
+ }
+#endif
for (y = 0; y < height; ++y) {
I422AlphaToARGBRow(src_y, src_u, src_v, src_a, dst_argb, yuvconstants,
@@ -1947,6 +2143,7 @@ int I422AlphaToARGBMatrix(const uint8_t* src_y,
int width) = I422AlphaToARGBRow_C;
void (*ARGBAttenuateRow)(const uint8_t* src_argb, uint8_t* dst_argb,
int width) = ARGBAttenuateRow_C;
+ assert(yuvconstants);
if (!src_y || !src_u || !src_v || !src_a || !dst_argb || width <= 0 ||
height == 0) {
return -1;
@@ -1989,6 +2186,14 @@ int I422AlphaToARGBMatrix(const uint8_t* src_y,
}
}
#endif
+#if defined(HAS_I422ALPHATOARGBROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ I422AlphaToARGBRow = I422AlphaToARGBRow_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ I422AlphaToARGBRow = I422AlphaToARGBRow_LSX;
+ }
+ }
+#endif
#if defined(HAS_I422ALPHATOARGBROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
I422AlphaToARGBRow = I422AlphaToARGBRow_Any_LASX;
@@ -1997,6 +2202,11 @@ int I422AlphaToARGBMatrix(const uint8_t* src_y,
}
}
#endif
+#if defined(HAS_I422ALPHATOARGBROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ I422AlphaToARGBRow = I422AlphaToARGBRow_RVV;
+ }
+#endif
#if defined(HAS_ARGBATTENUATEROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBAttenuateRow = ARGBAttenuateRow_Any_SSSE3;
@@ -2029,6 +2239,11 @@ int I422AlphaToARGBMatrix(const uint8_t* src_y,
}
}
#endif
+#if defined(HAS_ARGBATTENUATEROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ ARGBAttenuateRow = ARGBAttenuateRow_RVV;
+ }
+#endif
for (y = 0; y < height; ++y) {
I422AlphaToARGBRow(src_y, src_u, src_v, src_a, dst_argb, yuvconstants,
@@ -2069,6 +2284,7 @@ int I444AlphaToARGBMatrix(const uint8_t* src_y,
int width) = I444AlphaToARGBRow_C;
void (*ARGBAttenuateRow)(const uint8_t* src_argb, uint8_t* dst_argb,
int width) = ARGBAttenuateRow_C;
+ assert(yuvconstants);
if (!src_y || !src_u || !src_v || !src_a || !dst_argb || width <= 0 ||
height == 0) {
return -1;
@@ -2111,6 +2327,11 @@ int I444AlphaToARGBMatrix(const uint8_t* src_y,
}
}
#endif
+#if defined(HAS_I444ALPHATOARGBROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ I444AlphaToARGBRow = I444AlphaToARGBRow_RVV;
+ }
+#endif
#if defined(HAS_ARGBATTENUATEROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBAttenuateRow = ARGBAttenuateRow_Any_SSSE3;
@@ -2143,6 +2364,11 @@ int I444AlphaToARGBMatrix(const uint8_t* src_y,
}
}
#endif
+#if defined(HAS_ARGBATTENUATEROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ ARGBAttenuateRow = ARGBAttenuateRow_RVV;
+ }
+#endif
for (y = 0; y < height; ++y) {
I444AlphaToARGBRow(src_y, src_u, src_v, src_a, dst_argb, yuvconstants,
@@ -2312,6 +2538,7 @@ int I010AlphaToARGBMatrix(const uint16_t* src_y,
int width) = I210AlphaToARGBRow_C;
void (*ARGBAttenuateRow)(const uint8_t* src_argb, uint8_t* dst_argb,
int width) = ARGBAttenuateRow_C;
+ assert(yuvconstants);
if (!src_y || !src_u || !src_v || !src_a || !dst_argb || width <= 0 ||
height == 0) {
return -1;
@@ -2370,6 +2597,11 @@ int I010AlphaToARGBMatrix(const uint16_t* src_y,
}
}
#endif
+#if defined(HAS_ARGBATTENUATEROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ ARGBAttenuateRow = ARGBAttenuateRow_RVV;
+ }
+#endif
for (y = 0; y < height; ++y) {
I210AlphaToARGBRow(src_y, src_u, src_v, src_a, dst_argb, yuvconstants,
@@ -2412,6 +2644,7 @@ int I210AlphaToARGBMatrix(const uint16_t* src_y,
int width) = I210AlphaToARGBRow_C;
void (*ARGBAttenuateRow)(const uint8_t* src_argb, uint8_t* dst_argb,
int width) = ARGBAttenuateRow_C;
+ assert(yuvconstants);
if (!src_y || !src_u || !src_v || !src_a || !dst_argb || width <= 0 ||
height == 0) {
return -1;
@@ -2470,6 +2703,11 @@ int I210AlphaToARGBMatrix(const uint16_t* src_y,
}
}
#endif
+#if defined(HAS_ARGBATTENUATEROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ ARGBAttenuateRow = ARGBAttenuateRow_RVV;
+ }
+#endif
for (y = 0; y < height; ++y) {
I210AlphaToARGBRow(src_y, src_u, src_v, src_a, dst_argb, yuvconstants,
@@ -2510,6 +2748,7 @@ int I410AlphaToARGBMatrix(const uint16_t* src_y,
int width) = I410AlphaToARGBRow_C;
void (*ARGBAttenuateRow)(const uint8_t* src_argb, uint8_t* dst_argb,
int width) = ARGBAttenuateRow_C;
+ assert(yuvconstants);
if (!src_y || !src_u || !src_v || !src_a || !dst_argb || width <= 0 ||
height == 0) {
return -1;
@@ -2568,6 +2807,11 @@ int I410AlphaToARGBMatrix(const uint16_t* src_y,
}
}
#endif
+#if defined(HAS_ARGBATTENUATEROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ ARGBAttenuateRow = ARGBAttenuateRow_RVV;
+ }
+#endif
for (y = 0; y < height; ++y) {
I410AlphaToARGBRow(src_y, src_u, src_v, src_a, dst_argb, yuvconstants,
@@ -2597,6 +2841,7 @@ int I400ToARGBMatrix(const uint8_t* src_y,
void (*I400ToARGBRow)(const uint8_t* y_buf, uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants, int width) =
I400ToARGBRow_C;
+ assert(yuvconstants);
if (!src_y || !dst_argb || width <= 0 || height == 0) {
return -1;
}
@@ -2652,6 +2897,11 @@ int I400ToARGBMatrix(const uint8_t* src_y,
}
}
#endif
+#if defined(HAS_I400TOARGBROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ I400ToARGBRow = I400ToARGBRow_RVV;
+ }
+#endif
for (y = 0; y < height; ++y) {
I400ToARGBRow(src_y, dst_argb, yuvconstants, width);
@@ -2739,6 +2989,12 @@ int J400ToARGB(const uint8_t* src_y,
}
}
#endif
+#if defined(HAS_J400TOARGBROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ J400ToARGBRow = J400ToARGBRow_RVV;
+ }
+#endif
+
for (y = 0; y < height; ++y) {
J400ToARGBRow(src_y, dst_argb, width);
src_y += src_stride_y;
@@ -2901,6 +3157,11 @@ int RGB24ToARGB(const uint8_t* src_rgb24,
}
}
#endif
+#if defined(HAS_RGB24TOARGBROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ RGB24ToARGBRow = RGB24ToARGBRow_RVV;
+ }
+#endif
for (y = 0; y < height; ++y) {
RGB24ToARGBRow(src_rgb24, dst_argb, width);
@@ -2976,6 +3237,11 @@ int RAWToARGB(const uint8_t* src_raw,
}
}
#endif
+#if defined(HAS_RAWTOARGBROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ RAWToARGBRow = RAWToARGBRow_RVV;
+ }
+#endif
for (y = 0; y < height; ++y) {
RAWToARGBRow(src_raw, dst_argb, width);
@@ -3027,6 +3293,11 @@ int RAWToRGBA(const uint8_t* src_raw,
}
}
#endif
+#if defined(HAS_RAWTORGBAROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ RAWToRGBARow = RAWToRGBARow_RVV;
+ }
+#endif
for (y = 0; y < height; ++y) {
RAWToRGBARow(src_raw, dst_rgba, width);
@@ -3431,6 +3702,11 @@ int AR64ToARGB(const uint16_t* src_ar64,
}
}
#endif
+#if defined(HAS_AR64TOARGBROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ AR64ToARGBRow = AR64ToARGBRow_RVV;
+ }
+#endif
for (y = 0; y < height; ++y) {
AR64ToARGBRow(src_ar64, dst_argb, width);
@@ -3490,6 +3766,11 @@ int AB64ToARGB(const uint16_t* src_ab64,
}
}
#endif
+#if defined(HAS_AB64TOARGBROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ AB64ToARGBRow = AB64ToARGBRow_RVV;
+ }
+#endif
for (y = 0; y < height; ++y) {
AB64ToARGBRow(src_ab64, dst_argb, width);
@@ -3514,6 +3795,7 @@ int NV12ToARGBMatrix(const uint8_t* src_y,
void (*NV12ToARGBRow)(
const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants, int width) = NV12ToARGBRow_C;
+ assert(yuvconstants);
if (!src_y || !src_uv || !dst_argb || width <= 0 || height == 0) {
return -1;
}
@@ -3598,6 +3880,7 @@ int NV21ToARGBMatrix(const uint8_t* src_y,
void (*NV21ToARGBRow)(
const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants, int width) = NV21ToARGBRow_C;
+ assert(yuvconstants);
if (!src_y || !src_vu || !dst_argb || width <= 0 || height == 0) {
return -1;
}
@@ -3741,6 +4024,7 @@ int NV12ToRGB24Matrix(const uint8_t* src_y,
void (*NV12ToRGB24Row)(
const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants, int width) = NV12ToRGB24Row_C;
+ assert(yuvconstants);
if (!src_y || !src_uv || !dst_rgb24 || width <= 0 || height == 0) {
return -1;
}
@@ -3801,6 +4085,7 @@ int NV21ToRGB24Matrix(const uint8_t* src_y,
void (*NV21ToRGB24Row)(
const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants, int width) = NV21ToRGB24Row_C;
+ assert(yuvconstants);
if (!src_y || !src_vu || !dst_rgb24 || width <= 0 || height == 0) {
return -1;
}
@@ -4143,6 +4428,7 @@ int Android420ToARGBMatrix(const uint8_t* src_y,
const ptrdiff_t vu_off = src_v - src_u;
int halfwidth = (width + 1) >> 1;
int halfheight = (height + 1) >> 1;
+ assert(yuvconstants);
if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) {
return -1;
}
@@ -4243,6 +4529,7 @@ int I422ToRGBAMatrix(const uint8_t* src_y,
const uint8_t* v_buf, uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants, int width) =
I422ToRGBARow_C;
+ assert(yuvconstants);
if (!src_y || !src_u || !src_v || !dst_rgba || width <= 0 || height == 0) {
return -1;
}
@@ -4284,6 +4571,14 @@ int I422ToRGBAMatrix(const uint8_t* src_y,
}
}
#endif
+#if defined(HAS_I422TORGBAROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ I422ToRGBARow = I422ToRGBARow_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ I422ToRGBARow = I422ToRGBARow_LSX;
+ }
+ }
+#endif
#if defined(HAS_I422TORGBAROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
I422ToRGBARow = I422ToRGBARow_Any_LASX;
@@ -4292,6 +4587,11 @@ int I422ToRGBAMatrix(const uint8_t* src_y,
}
}
#endif
+#if defined(HAS_I422TORGBAROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ I422ToRGBARow = I422ToRGBARow_RVV;
+ }
+#endif
for (y = 0; y < height; ++y) {
I422ToRGBARow(src_y, src_u, src_v, dst_rgba, yuvconstants, width);
@@ -4354,6 +4654,7 @@ int NV12ToRGB565Matrix(const uint8_t* src_y,
void (*NV12ToRGB565Row)(
const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants, int width) = NV12ToRGB565Row_C;
+ assert(yuvconstants);
if (!src_y || !src_uv || !dst_rgb565 || width <= 0 || height == 0) {
return -1;
}
@@ -4456,6 +4757,7 @@ int I420ToRGBAMatrix(const uint8_t* src_y,
const uint8_t* v_buf, uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants, int width) =
I422ToRGBARow_C;
+ assert(yuvconstants);
if (!src_y || !src_u || !src_v || !dst_rgba || width <= 0 || height == 0) {
return -1;
}
@@ -4497,6 +4799,14 @@ int I420ToRGBAMatrix(const uint8_t* src_y,
}
}
#endif
+#if defined(HAS_I422TORGBAROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ I422ToRGBARow = I422ToRGBARow_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ I422ToRGBARow = I422ToRGBARow_LSX;
+ }
+ }
+#endif
#if defined(HAS_I422TORGBAROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
I422ToRGBARow = I422ToRGBARow_Any_LASX;
@@ -4505,6 +4815,11 @@ int I420ToRGBAMatrix(const uint8_t* src_y,
}
}
#endif
+#if defined(HAS_I422TORGBAROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ I422ToRGBARow = I422ToRGBARow_RVV;
+ }
+#endif
for (y = 0; y < height; ++y) {
I422ToRGBARow(src_y, src_u, src_v, dst_rgba, yuvconstants, width);
@@ -4572,6 +4887,7 @@ int I420ToRGB24Matrix(const uint8_t* src_y,
const uint8_t* v_buf, uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants, int width) =
I422ToRGB24Row_C;
+ assert(yuvconstants);
if (!src_y || !src_u || !src_v || !dst_rgb24 || width <= 0 || height == 0) {
return -1;
}
@@ -4613,6 +4929,14 @@ int I420ToRGB24Matrix(const uint8_t* src_y,
}
}
#endif
+#if defined(HAS_I422TORGB24ROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ I422ToRGB24Row = I422ToRGB24Row_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ I422ToRGB24Row = I422ToRGB24Row_LSX;
+ }
+ }
+#endif
#if defined(HAS_I422TORGB24ROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
I422ToRGB24Row = I422ToRGB24Row_Any_LASX;
@@ -4621,6 +4945,11 @@ int I420ToRGB24Matrix(const uint8_t* src_y,
}
}
#endif
+#if defined(HAS_I422TORGB24ROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ I422ToRGB24Row = I422ToRGB24Row_RVV;
+ }
+#endif
for (y = 0; y < height; ++y) {
I422ToRGB24Row(src_y, src_u, src_v, dst_rgb24, yuvconstants, width);
@@ -4742,6 +5071,134 @@ int H420ToRAW(const uint8_t* src_y,
width, height);
}
+// Convert I422 to RGB24 with matrix.
+LIBYUV_API
+int I422ToRGB24Matrix(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_rgb24,
+ int dst_stride_rgb24,
+ const struct YuvConstants* yuvconstants,
+ int width,
+ int height) {
+ int y;
+ void (*I422ToRGB24Row)(const uint8_t* y_buf, const uint8_t* u_buf,
+ const uint8_t* v_buf, uint8_t* rgb_buf,
+ const struct YuvConstants* yuvconstants, int width) =
+ I422ToRGB24Row_C;
+ assert(yuvconstants);
+ if (!src_y || !src_u || !src_v || !dst_rgb24 || width <= 0 || height == 0) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ dst_rgb24 = dst_rgb24 + (height - 1) * dst_stride_rgb24;
+ dst_stride_rgb24 = -dst_stride_rgb24;
+ }
+#if defined(HAS_I422TORGB24ROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ I422ToRGB24Row = I422ToRGB24Row_Any_SSSE3;
+ if (IS_ALIGNED(width, 16)) {
+ I422ToRGB24Row = I422ToRGB24Row_SSSE3;
+ }
+ }
+#endif
+#if defined(HAS_I422TORGB24ROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ I422ToRGB24Row = I422ToRGB24Row_Any_AVX2;
+ if (IS_ALIGNED(width, 32)) {
+ I422ToRGB24Row = I422ToRGB24Row_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_I422TORGB24ROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ I422ToRGB24Row = I422ToRGB24Row_Any_NEON;
+ if (IS_ALIGNED(width, 8)) {
+ I422ToRGB24Row = I422ToRGB24Row_NEON;
+ }
+ }
+#endif
+#if defined(HAS_I422TORGB24ROW_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ I422ToRGB24Row = I422ToRGB24Row_Any_MSA;
+ if (IS_ALIGNED(width, 16)) {
+ I422ToRGB24Row = I422ToRGB24Row_MSA;
+ }
+ }
+#endif
+#if defined(HAS_I422TORGB24ROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ I422ToRGB24Row = I422ToRGB24Row_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ I422ToRGB24Row = I422ToRGB24Row_LSX;
+ }
+ }
+#endif
+#if defined(HAS_I422TORGB24ROW_LASX)
+ if (TestCpuFlag(kCpuHasLASX)) {
+ I422ToRGB24Row = I422ToRGB24Row_Any_LASX;
+ if (IS_ALIGNED(width, 32)) {
+ I422ToRGB24Row = I422ToRGB24Row_LASX;
+ }
+ }
+#endif
+#if defined(HAS_I422TORGB24ROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ I422ToRGB24Row = I422ToRGB24Row_RVV;
+ }
+#endif
+
+ for (y = 0; y < height; ++y) {
+ I422ToRGB24Row(src_y, src_u, src_v, dst_rgb24, yuvconstants, width);
+ dst_rgb24 += dst_stride_rgb24;
+ src_y += src_stride_y;
+ src_u += src_stride_u;
+ src_v += src_stride_v;
+ }
+ return 0;
+}
+
+// Convert I422 to RGB24.
+LIBYUV_API
+int I422ToRGB24(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_rgb24,
+ int dst_stride_rgb24,
+ int width,
+ int height) {
+ return I422ToRGB24Matrix(src_y, src_stride_y, src_u, src_stride_u, src_v,
+ src_stride_v, dst_rgb24, dst_stride_rgb24,
+ &kYuvI601Constants, width, height);
+}
+
+// Convert I422 to RAW.
+LIBYUV_API
+int I422ToRAW(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_raw,
+ int dst_stride_raw,
+ int width,
+ int height) {
+ return I422ToRGB24Matrix(src_y, src_stride_y, src_v,
+ src_stride_v, // Swap U and V
+ src_u, src_stride_u, dst_raw, dst_stride_raw,
+ &kYvuI601Constants, // Use Yvu matrix
+ width, height);
+}
+
// Convert I420 to ARGB1555.
LIBYUV_API
int I420ToARGB1555(const uint8_t* src_y,
@@ -4801,6 +5258,14 @@ int I420ToARGB1555(const uint8_t* src_y,
}
}
#endif
+#if defined(HAS_I422TOARGB1555ROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ I422ToARGB1555Row = I422ToARGB1555Row_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ I422ToARGB1555Row = I422ToARGB1555Row_LSX;
+ }
+ }
+#endif
#if defined(HAS_I422TOARGB1555ROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
I422ToARGB1555Row = I422ToARGB1555Row_Any_LASX;
@@ -4882,6 +5347,14 @@ int I420ToARGB4444(const uint8_t* src_y,
}
}
#endif
+#if defined(HAS_I422TOARGB4444ROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ I422ToARGB4444Row = I422ToARGB4444Row_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ I422ToARGB4444Row = I422ToARGB4444Row_LSX;
+ }
+ }
+#endif
#if defined(HAS_I422TOARGB4444ROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
I422ToARGB4444Row = I422ToARGB4444Row_Any_LASX;
@@ -4922,6 +5395,7 @@ int I420ToRGB565Matrix(const uint8_t* src_y,
const uint8_t* v_buf, uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants, int width) =
I422ToRGB565Row_C;
+ assert(yuvconstants);
if (!src_y || !src_u || !src_v || !dst_rgb565 || width <= 0 || height == 0) {
return -1;
}
@@ -4963,6 +5437,14 @@ int I420ToRGB565Matrix(const uint8_t* src_y,
}
}
#endif
+#if defined(HAS_I422TORGB565ROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ I422ToRGB565Row = I422ToRGB565Row_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ I422ToRGB565Row = I422ToRGB565Row_LSX;
+ }
+ }
+#endif
#if defined(HAS_I422TORGB565ROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
I422ToRGB565Row = I422ToRGB565Row_Any_LASX;
@@ -5035,23 +5517,25 @@ int H420ToRGB565(const uint8_t* src_y,
&kYuvH709Constants, width, height);
}
-// Convert I422 to RGB565.
+// Convert I422 to RGB565 with specified color matrix.
LIBYUV_API
-int I422ToRGB565(const uint8_t* src_y,
- int src_stride_y,
- const uint8_t* src_u,
- int src_stride_u,
- const uint8_t* src_v,
- int src_stride_v,
- uint8_t* dst_rgb565,
- int dst_stride_rgb565,
- int width,
- int height) {
+int I422ToRGB565Matrix(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_rgb565,
+ int dst_stride_rgb565,
+ const struct YuvConstants* yuvconstants,
+ int width,
+ int height) {
int y;
void (*I422ToRGB565Row)(const uint8_t* y_buf, const uint8_t* u_buf,
const uint8_t* v_buf, uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants, int width) =
I422ToRGB565Row_C;
+ assert(yuvconstants);
if (!src_y || !src_u || !src_v || !dst_rgb565 || width <= 0 || height == 0) {
return -1;
}
@@ -5093,6 +5577,14 @@ int I422ToRGB565(const uint8_t* src_y,
}
}
#endif
+#if defined(HAS_I422TORGB565ROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ I422ToRGB565Row = I422ToRGB565Row_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ I422ToRGB565Row = I422ToRGB565Row_LSX;
+ }
+ }
+#endif
#if defined(HAS_I422TORGB565ROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
I422ToRGB565Row = I422ToRGB565Row_Any_LASX;
@@ -5103,7 +5595,7 @@ int I422ToRGB565(const uint8_t* src_y,
#endif
for (y = 0; y < height; ++y) {
- I422ToRGB565Row(src_y, src_u, src_v, dst_rgb565, &kYuvI601Constants, width);
+ I422ToRGB565Row(src_y, src_u, src_v, dst_rgb565, yuvconstants, width);
dst_rgb565 += dst_stride_rgb565;
src_y += src_stride_y;
src_u += src_stride_u;
@@ -5112,6 +5604,23 @@ int I422ToRGB565(const uint8_t* src_y,
return 0;
}
+// Convert I422 to RGB565.
+LIBYUV_API
+int I422ToRGB565(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_rgb565,
+ int dst_stride_rgb565,
+ int width,
+ int height) {
+ return I422ToRGB565Matrix(src_y, src_stride_y, src_u, src_stride_u, src_v,
+ src_stride_v, dst_rgb565, dst_stride_rgb565,
+ &kYuvI601Constants, width, height);
+}
+
// Ordered 8x8 dither for 888 to 565. Values from 0 to 7.
static const uint8_t kDither565_4x4[16] = {
0, 4, 1, 5, 6, 2, 7, 3, 1, 5, 0, 4, 7, 3, 6, 2,
@@ -5136,7 +5645,7 @@ int I420ToRGB565Dither(const uint8_t* src_y,
const struct YuvConstants* yuvconstants, int width) =
I422ToARGBRow_C;
void (*ARGBToRGB565DitherRow)(const uint8_t* src_argb, uint8_t* dst_rgb,
- const uint32_t dither4, int width) =
+ uint32_t dither4, int width) =
ARGBToRGB565DitherRow_C;
if (!src_y || !src_u || !src_v || !dst_rgb565 || width <= 0 || height == 0) {
return -1;
@@ -5191,6 +5700,14 @@ int I420ToRGB565Dither(const uint8_t* src_y,
}
}
#endif
+#if defined(HAS_I422TOARGBROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ I422ToARGBRow = I422ToARGBRow_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ I422ToARGBRow = I422ToARGBRow_LSX;
+ }
+ }
+#endif
#if defined(HAS_I422TOARGBROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
I422ToARGBRow = I422ToARGBRow_Any_LASX;
@@ -5199,6 +5716,11 @@ int I420ToRGB565Dither(const uint8_t* src_y,
}
}
#endif
+#if defined(HAS_I422TOARGBROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ I422ToARGBRow = I422ToARGBRow_RVV;
+ }
+#endif
#if defined(HAS_ARGBTORGB565DITHERROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_Any_SSE2;
@@ -5231,6 +5753,14 @@ int I420ToRGB565Dither(const uint8_t* src_y,
}
}
#endif
+#if defined(HAS_ARGBTORGB565DITHERROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_Any_LSX;
+ if (IS_ALIGNED(width, 8)) {
+ ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_LSX;
+ }
+ }
+#endif
#if defined(HAS_ARGBTORGB565DITHERROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_Any_LASX;
@@ -5278,6 +5808,7 @@ int I420ToAR30Matrix(const uint8_t* src_y,
const struct YuvConstants* yuvconstants, int width) =
I422ToAR30Row_C;
+ assert(yuvconstants);
if (!src_y || !src_u || !src_v || !dst_ar30 || width <= 0 || height == 0) {
return -1;
}
@@ -5401,9 +5932,12 @@ static int I420ToARGBMatrixBilinear(const uint8_t* src_y,
const uint8_t* v_buf, uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants, int width) =
I444ToARGBRow_C;
- void (*Scale2RowUp)(const uint8_t* src_ptr, ptrdiff_t src_stride,
- uint8_t* dst_ptr, ptrdiff_t dst_stride, int dst_width) =
- ScaleRowUp2_Bilinear_Any_C;
+ void (*Scale2RowUp_Bilinear)(const uint8_t* src_ptr, ptrdiff_t src_stride,
+ uint8_t* dst_ptr, ptrdiff_t dst_stride,
+ int dst_width) = ScaleRowUp2_Bilinear_Any_C;
+ void (*ScaleRowUp2_Linear)(const uint8_t* src_ptr, uint8_t* dst_ptr,
+ int dst_width) = ScaleRowUp2_Linear_Any_C;
+ assert(yuvconstants);
if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) {
return -1;
}
@@ -5453,48 +5987,57 @@ static int I420ToARGBMatrixBilinear(const uint8_t* src_y,
}
}
#endif
+#if defined(HAS_I444TOARGBROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ I444ToARGBRow = I444ToARGBRow_RVV;
+ }
+#endif
-#if defined(HAS_SCALEROWUP2_LINEAR_SSE2)
+#if defined(HAS_SCALEROWUP2_BILINEAR_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
- Scale2RowUp = ScaleRowUp2_Bilinear_Any_SSE2;
+ Scale2RowUp_Bilinear = ScaleRowUp2_Bilinear_Any_SSE2;
+ ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_SSE2;
}
#endif
-#if defined(HAS_SCALEROWUP2_LINEAR_SSSE3)
+#if defined(HAS_SCALEROWUP2_BILINEAR_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
- Scale2RowUp = ScaleRowUp2_Bilinear_Any_SSSE3;
+ Scale2RowUp_Bilinear = ScaleRowUp2_Bilinear_Any_SSSE3;
+ ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_SSSE3;
}
#endif
-#if defined(HAS_SCALEROWUP2_LINEAR_AVX2)
+#if defined(HAS_SCALEROWUP2_BILINEAR_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
- Scale2RowUp = ScaleRowUp2_Bilinear_Any_AVX2;
+ Scale2RowUp_Bilinear = ScaleRowUp2_Bilinear_Any_AVX2;
+ ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_AVX2;
}
#endif
-#if defined(HAS_SCALEROWUP2_LINEAR_NEON)
+#if defined(HAS_SCALEROWUP2_BILINEAR_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
- Scale2RowUp = ScaleRowUp2_Bilinear_Any_NEON;
+ Scale2RowUp_Bilinear = ScaleRowUp2_Bilinear_Any_NEON;
+ ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_NEON;
}
#endif
// alloc 4 lines temp
- const int kRowSize = (width + 31) & ~31;
- align_buffer_64(row, kRowSize * 4);
+ const int row_size = (width + 31) & ~31;
+ align_buffer_64(row, row_size * 4);
uint8_t* temp_u_1 = row;
- uint8_t* temp_u_2 = row + kRowSize;
- uint8_t* temp_v_1 = row + kRowSize * 2;
- uint8_t* temp_v_2 = row + kRowSize * 3;
+ uint8_t* temp_u_2 = row + row_size;
+ uint8_t* temp_v_1 = row + row_size * 2;
+ uint8_t* temp_v_2 = row + row_size * 3;
- Scale2RowUp(src_u, 0, temp_u_1, kRowSize, width);
- Scale2RowUp(src_v, 0, temp_v_1, kRowSize, width);
+ ScaleRowUp2_Linear(src_u, temp_u_1, width);
+ ScaleRowUp2_Linear(src_v, temp_v_1, width);
I444ToARGBRow(src_y, temp_u_1, temp_v_1, dst_argb, yuvconstants, width);
dst_argb += dst_stride_argb;
src_y += src_stride_y;
for (y = 0; y < height - 2; y += 2) {
- Scale2RowUp(src_u, src_stride_u, temp_u_1, kRowSize, width);
- Scale2RowUp(src_v, src_stride_v, temp_v_1, kRowSize, width);
+ Scale2RowUp_Bilinear(src_u, src_stride_u, temp_u_1, row_size, width);
+ Scale2RowUp_Bilinear(src_v, src_stride_v, temp_v_1, row_size, width);
I444ToARGBRow(src_y, temp_u_1, temp_v_1, dst_argb, yuvconstants, width);
dst_argb += dst_stride_argb;
src_y += src_stride_y;
@@ -5506,8 +6049,8 @@ static int I420ToARGBMatrixBilinear(const uint8_t* src_y,
}
if (!(height & 1)) {
- Scale2RowUp(src_u, 0, temp_u_1, kRowSize, width);
- Scale2RowUp(src_v, 0, temp_v_1, kRowSize, width);
+ ScaleRowUp2_Linear(src_u, temp_u_1, width);
+ ScaleRowUp2_Linear(src_v, temp_v_1, width);
I444ToARGBRow(src_y, temp_u_1, temp_v_1, dst_argb, yuvconstants, width);
}
@@ -5531,8 +6074,9 @@ static int I422ToARGBMatrixLinear(const uint8_t* src_y,
const uint8_t* v_buf, uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants, int width) =
I444ToARGBRow_C;
- void (*ScaleRowUp)(const uint8_t* src_ptr, uint8_t* dst_ptr, int dst_width) =
- ScaleRowUp2_Linear_Any_C;
+ void (*ScaleRowUp2_Linear)(const uint8_t* src_ptr, uint8_t* dst_ptr,
+ int dst_width) = ScaleRowUp2_Linear_Any_C;
+ assert(yuvconstants);
if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) {
return -1;
}
@@ -5582,36 +6126,41 @@ static int I422ToARGBMatrixLinear(const uint8_t* src_y,
}
}
#endif
+#if defined(HAS_I444TOARGBROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ I444ToARGBRow = I444ToARGBRow_RVV;
+ }
+#endif
#if defined(HAS_SCALEROWUP2_LINEAR_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
- ScaleRowUp = ScaleRowUp2_Linear_Any_SSE2;
+ ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_SSE2;
}
#endif
#if defined(HAS_SCALEROWUP2_LINEAR_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
- ScaleRowUp = ScaleRowUp2_Linear_Any_SSSE3;
+ ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_SSSE3;
}
#endif
#if defined(HAS_SCALEROWUP2_LINEAR_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
- ScaleRowUp = ScaleRowUp2_Linear_Any_AVX2;
+ ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_AVX2;
}
#endif
#if defined(HAS_SCALEROWUP2_LINEAR_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
- ScaleRowUp = ScaleRowUp2_Linear_Any_NEON;
+ ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_NEON;
}
#endif
// alloc 2 lines temp
- const int kRowSize = (width + 31) & ~31;
- align_buffer_64(row, kRowSize * 2);
+ const int row_size = (width + 31) & ~31;
+ align_buffer_64(row, row_size * 2);
uint8_t* temp_u = row;
- uint8_t* temp_v = row + kRowSize;
+ uint8_t* temp_v = row + row_size;
for (y = 0; y < height; ++y) {
- ScaleRowUp(src_u, temp_u, width);
- ScaleRowUp(src_v, temp_v, width);
+ ScaleRowUp2_Linear(src_u, temp_u, width);
+ ScaleRowUp2_Linear(src_v, temp_v, width);
I444ToARGBRow(src_y, temp_u, temp_v, dst_argb, yuvconstants, width);
dst_argb += dst_stride_argb;
src_y += src_stride_y;
@@ -5623,6 +6172,148 @@ static int I422ToARGBMatrixLinear(const uint8_t* src_y,
return 0;
}
+static int I420ToRGB24MatrixBilinear(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_rgb24,
+ int dst_stride_rgb24,
+ const struct YuvConstants* yuvconstants,
+ int width,
+ int height) {
+ int y;
+ void (*I444ToRGB24Row)(const uint8_t* y_buf, const uint8_t* u_buf,
+ const uint8_t* v_buf, uint8_t* rgb_buf,
+ const struct YuvConstants* yuvconstants, int width) =
+ I444ToRGB24Row_C;
+ void (*Scale2RowUp_Bilinear)(const uint8_t* src_ptr, ptrdiff_t src_stride,
+ uint8_t* dst_ptr, ptrdiff_t dst_stride,
+ int dst_width) = ScaleRowUp2_Bilinear_Any_C;
+ void (*ScaleRowUp2_Linear)(const uint8_t* src_ptr, uint8_t* dst_ptr,
+ int dst_width) = ScaleRowUp2_Linear_Any_C;
+ assert(yuvconstants);
+ if (!src_y || !src_u || !src_v || !dst_rgb24 || width <= 0 || height == 0) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ dst_rgb24 = dst_rgb24 + (height - 1) * dst_stride_rgb24;
+ dst_stride_rgb24 = -dst_stride_rgb24;
+ }
+#if defined(HAS_I444TORGB24ROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ I444ToRGB24Row = I444ToRGB24Row_Any_SSSE3;
+ if (IS_ALIGNED(width, 16)) {
+ I444ToRGB24Row = I444ToRGB24Row_SSSE3;
+ }
+ }
+#endif
+#if defined(HAS_I444TORGB24ROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ I444ToRGB24Row = I444ToRGB24Row_Any_AVX2;
+ if (IS_ALIGNED(width, 32)) {
+ I444ToRGB24Row = I444ToRGB24Row_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_I444TORGB24ROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ I444ToRGB24Row = I444ToRGB24Row_Any_NEON;
+ if (IS_ALIGNED(width, 8)) {
+ I444ToRGB24Row = I444ToRGB24Row_NEON;
+ }
+ }
+#endif
+#if defined(HAS_I444TORGB24ROW_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ I444ToRGB24Row = I444ToRGB24Row_Any_MSA;
+ if (IS_ALIGNED(width, 8)) {
+ I444ToRGB24Row = I444ToRGB24Row_MSA;
+ }
+ }
+#endif
+#if defined(HAS_I444TORGB24ROW_LASX)
+ if (TestCpuFlag(kCpuHasLASX)) {
+ I444ToRGB24Row = I444ToRGB24Row_Any_LASX;
+ if (IS_ALIGNED(width, 32)) {
+ I444ToRGB24Row = I444ToRGB24Row_LASX;
+ }
+ }
+#endif
+#if defined(HAS_I444TORGB24ROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ I444ToRGB24Row = I444ToRGB24Row_RVV;
+ }
+#endif
+
+#if defined(HAS_SCALEROWUP2_BILINEAR_SSE2)
+ if (TestCpuFlag(kCpuHasSSE2)) {
+ Scale2RowUp_Bilinear = ScaleRowUp2_Bilinear_Any_SSE2;
+ ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_SSE2;
+ }
+#endif
+
+#if defined(HAS_SCALEROWUP2_BILINEAR_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ Scale2RowUp_Bilinear = ScaleRowUp2_Bilinear_Any_SSSE3;
+ ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_SSSE3;
+ }
+#endif
+
+#if defined(HAS_SCALEROWUP2_BILINEAR_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ Scale2RowUp_Bilinear = ScaleRowUp2_Bilinear_Any_AVX2;
+ ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_AVX2;
+ }
+#endif
+
+#if defined(HAS_SCALEROWUP2_BILINEAR_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ Scale2RowUp_Bilinear = ScaleRowUp2_Bilinear_Any_NEON;
+ ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_NEON;
+ }
+#endif
+
+ // alloc 4 lines temp
+ const int row_size = (width + 31) & ~31;
+ align_buffer_64(row, row_size * 4);
+ uint8_t* temp_u_1 = row;
+ uint8_t* temp_u_2 = row + row_size;
+ uint8_t* temp_v_1 = row + row_size * 2;
+ uint8_t* temp_v_2 = row + row_size * 3;
+
+ ScaleRowUp2_Linear(src_u, temp_u_1, width);
+ ScaleRowUp2_Linear(src_v, temp_v_1, width);
+ I444ToRGB24Row(src_y, temp_u_1, temp_v_1, dst_rgb24, yuvconstants, width);
+ dst_rgb24 += dst_stride_rgb24;
+ src_y += src_stride_y;
+
+ for (y = 0; y < height - 2; y += 2) {
+ Scale2RowUp_Bilinear(src_u, src_stride_u, temp_u_1, row_size, width);
+ Scale2RowUp_Bilinear(src_v, src_stride_v, temp_v_1, row_size, width);
+ I444ToRGB24Row(src_y, temp_u_1, temp_v_1, dst_rgb24, yuvconstants, width);
+ dst_rgb24 += dst_stride_rgb24;
+ src_y += src_stride_y;
+ I444ToRGB24Row(src_y, temp_u_2, temp_v_2, dst_rgb24, yuvconstants, width);
+ dst_rgb24 += dst_stride_rgb24;
+ src_y += src_stride_y;
+ src_u += src_stride_u;
+ src_v += src_stride_v;
+ }
+
+ if (!(height & 1)) {
+ ScaleRowUp2_Linear(src_u, temp_u_1, width);
+ ScaleRowUp2_Linear(src_v, temp_v_1, width);
+ I444ToRGB24Row(src_y, temp_u_1, temp_v_1, dst_rgb24, yuvconstants, width);
+ }
+
+ free_aligned_buffer_64(row);
+ return 0;
+}
+
static int I010ToAR30MatrixBilinear(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
@@ -5639,9 +6330,12 @@ static int I010ToAR30MatrixBilinear(const uint16_t* src_y,
const uint16_t* v_buf, uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants, int width) =
I410ToAR30Row_C;
- void (*Scale2RowUp)(const uint16_t* src_ptr, ptrdiff_t src_stride,
- uint16_t* dst_ptr, ptrdiff_t dst_stride, int dst_width) =
- ScaleRowUp2_Bilinear_16_Any_C;
+ void (*Scale2RowUp_Bilinear_12)(
+ const uint16_t* src_ptr, ptrdiff_t src_stride, uint16_t* dst_ptr,
+ ptrdiff_t dst_stride, int dst_width) = ScaleRowUp2_Bilinear_16_Any_C;
+ void (*ScaleRowUp2_Linear_12)(const uint16_t* src_ptr, uint16_t* dst_ptr,
+ int dst_width) = ScaleRowUp2_Linear_16_Any_C;
+ assert(yuvconstants);
if (!src_y || !src_u || !src_v || !dst_ar30 || width <= 0 || height == 0) {
return -1;
}
@@ -5668,41 +6362,44 @@ static int I010ToAR30MatrixBilinear(const uint16_t* src_y,
}
#endif
-#if defined(HAS_SCALEROWUP2_LINEAR_12_SSSE3)
+#if defined(HAS_SCALEROWUP2_BILINEAR_12_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
- Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_SSSE3;
+ Scale2RowUp_Bilinear_12 = ScaleRowUp2_Bilinear_12_Any_SSSE3;
+ ScaleRowUp2_Linear_12 = ScaleRowUp2_Linear_12_Any_SSSE3;
}
#endif
-#if defined(HAS_SCALEROWUP2_LINEAR_12_AVX2)
+#if defined(HAS_SCALEROWUP2_BILINEAR_12_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
- Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_AVX2;
+ Scale2RowUp_Bilinear_12 = ScaleRowUp2_Bilinear_12_Any_AVX2;
+ ScaleRowUp2_Linear_12 = ScaleRowUp2_Linear_12_Any_AVX2;
}
#endif
-#if defined(HAS_SCALEROWUP2_LINEAR_12_NEON)
+#if defined(HAS_SCALEROWUP2_BILINEAR_12_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
- Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_NEON;
+ Scale2RowUp_Bilinear_12 = ScaleRowUp2_Bilinear_12_Any_NEON;
+ ScaleRowUp2_Linear_12 = ScaleRowUp2_Linear_12_Any_NEON;
}
#endif
// alloc 4 lines temp
- const int kRowSize = (width + 31) & ~31;
- align_buffer_64(row, kRowSize * 4 * sizeof(uint16_t));
+ const int row_size = (width + 31) & ~31;
+ align_buffer_64(row, row_size * 4 * sizeof(uint16_t));
uint16_t* temp_u_1 = (uint16_t*)(row);
- uint16_t* temp_u_2 = (uint16_t*)(row) + kRowSize;
- uint16_t* temp_v_1 = (uint16_t*)(row) + kRowSize * 2;
- uint16_t* temp_v_2 = (uint16_t*)(row) + kRowSize * 3;
+ uint16_t* temp_u_2 = (uint16_t*)(row) + row_size;
+ uint16_t* temp_v_1 = (uint16_t*)(row) + row_size * 2;
+ uint16_t* temp_v_2 = (uint16_t*)(row) + row_size * 3;
- Scale2RowUp(src_u, 0, temp_u_1, kRowSize, width);
- Scale2RowUp(src_v, 0, temp_v_1, kRowSize, width);
+ ScaleRowUp2_Linear_12(src_u, temp_u_1, width);
+ ScaleRowUp2_Linear_12(src_v, temp_v_1, width);
I410ToAR30Row(src_y, temp_u_1, temp_v_1, dst_ar30, yuvconstants, width);
dst_ar30 += dst_stride_ar30;
src_y += src_stride_y;
for (y = 0; y < height - 2; y += 2) {
- Scale2RowUp(src_u, src_stride_u, temp_u_1, kRowSize, width);
- Scale2RowUp(src_v, src_stride_v, temp_v_1, kRowSize, width);
+ Scale2RowUp_Bilinear_12(src_u, src_stride_u, temp_u_1, row_size, width);
+ Scale2RowUp_Bilinear_12(src_v, src_stride_v, temp_v_1, row_size, width);
I410ToAR30Row(src_y, temp_u_1, temp_v_1, dst_ar30, yuvconstants, width);
dst_ar30 += dst_stride_ar30;
src_y += src_stride_y;
@@ -5714,8 +6411,8 @@ static int I010ToAR30MatrixBilinear(const uint16_t* src_y,
}
if (!(height & 1)) {
- Scale2RowUp(src_u, 0, temp_u_1, kRowSize, width);
- Scale2RowUp(src_v, 0, temp_v_1, kRowSize, width);
+ ScaleRowUp2_Linear_12(src_u, temp_u_1, width);
+ ScaleRowUp2_Linear_12(src_v, temp_v_1, width);
I410ToAR30Row(src_y, temp_u_1, temp_v_1, dst_ar30, yuvconstants, width);
}
@@ -5740,8 +6437,9 @@ static int I210ToAR30MatrixLinear(const uint16_t* src_y,
const uint16_t* v_buf, uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants, int width) =
I410ToAR30Row_C;
- void (*ScaleRowUp)(const uint16_t* src_ptr, uint16_t* dst_ptr,
- int dst_width) = ScaleRowUp2_Linear_16_Any_C;
+ void (*ScaleRowUp2_Linear_12)(const uint16_t* src_ptr, uint16_t* dst_ptr,
+ int dst_width) = ScaleRowUp2_Linear_16_Any_C;
+ assert(yuvconstants);
if (!src_y || !src_u || !src_v || !dst_ar30 || width <= 0 || height == 0) {
return -1;
}
@@ -5770,29 +6468,29 @@ static int I210ToAR30MatrixLinear(const uint16_t* src_y,
#if defined(HAS_SCALEROWUP2_LINEAR_12_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
- ScaleRowUp = ScaleRowUp2_Linear_12_Any_SSSE3;
+ ScaleRowUp2_Linear_12 = ScaleRowUp2_Linear_12_Any_SSSE3;
}
#endif
#if defined(HAS_SCALEROWUP2_LINEAR_12_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
- ScaleRowUp = ScaleRowUp2_Linear_12_Any_AVX2;
+ ScaleRowUp2_Linear_12 = ScaleRowUp2_Linear_12_Any_AVX2;
}
#endif
#if defined(HAS_SCALEROWUP2_LINEAR_12_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
- ScaleRowUp = ScaleRowUp2_Linear_12_Any_NEON;
+ ScaleRowUp2_Linear_12 = ScaleRowUp2_Linear_12_Any_NEON;
}
#endif
// alloc 2 lines temp
- const int kRowSize = (width + 31) & ~31;
- align_buffer_64(row, kRowSize * 2 * sizeof(uint16_t));
+ const int row_size = (width + 31) & ~31;
+ align_buffer_64(row, row_size * 2 * sizeof(uint16_t));
uint16_t* temp_u = (uint16_t*)(row);
- uint16_t* temp_v = (uint16_t*)(row) + kRowSize;
+ uint16_t* temp_v = (uint16_t*)(row) + row_size;
for (y = 0; y < height; ++y) {
- ScaleRowUp(src_u, temp_u, width);
- ScaleRowUp(src_v, temp_v, width);
+ ScaleRowUp2_Linear_12(src_u, temp_u, width);
+ ScaleRowUp2_Linear_12(src_v, temp_v, width);
I410ToAR30Row(src_y, temp_u, temp_v, dst_ar30, yuvconstants, width);
dst_ar30 += dst_stride_ar30;
src_y += src_stride_y;
@@ -5819,9 +6517,12 @@ static int I010ToARGBMatrixBilinear(const uint16_t* src_y,
const uint16_t* v_buf, uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants, int width) =
I410ToARGBRow_C;
- void (*Scale2RowUp)(const uint16_t* src_ptr, ptrdiff_t src_stride,
- uint16_t* dst_ptr, ptrdiff_t dst_stride, int dst_width) =
- ScaleRowUp2_Bilinear_16_Any_C;
+ void (*Scale2RowUp_Bilinear_12)(
+ const uint16_t* src_ptr, ptrdiff_t src_stride, uint16_t* dst_ptr,
+ ptrdiff_t dst_stride, int dst_width) = ScaleRowUp2_Bilinear_16_Any_C;
+ void (*ScaleRowUp2_Linear_12)(const uint16_t* src_ptr, uint16_t* dst_ptr,
+ int dst_width) = ScaleRowUp2_Linear_16_Any_C;
+ assert(yuvconstants);
if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) {
return -1;
}
@@ -5848,41 +6549,44 @@ static int I010ToARGBMatrixBilinear(const uint16_t* src_y,
}
#endif
-#if defined(HAS_SCALEROWUP2_LINEAR_12_SSSE3)
+#if defined(HAS_SCALEROWUP2_BILINEAR_12_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
- Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_SSSE3;
+ Scale2RowUp_Bilinear_12 = ScaleRowUp2_Bilinear_12_Any_SSSE3;
+ ScaleRowUp2_Linear_12 = ScaleRowUp2_Linear_12_Any_SSSE3;
}
#endif
-#if defined(HAS_SCALEROWUP2_LINEAR_12_AVX2)
+#if defined(HAS_SCALEROWUP2_BILINEAR_12_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
- Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_AVX2;
+ Scale2RowUp_Bilinear_12 = ScaleRowUp2_Bilinear_12_Any_AVX2;
+ ScaleRowUp2_Linear_12 = ScaleRowUp2_Linear_12_Any_AVX2;
}
#endif
-#if defined(HAS_SCALEROWUP2_LINEAR_12_NEON)
+#if defined(HAS_SCALEROWUP2_BILINEAR_12_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
- Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_NEON;
+ Scale2RowUp_Bilinear_12 = ScaleRowUp2_Bilinear_12_Any_NEON;
+ ScaleRowUp2_Linear_12 = ScaleRowUp2_Linear_12_Any_NEON;
}
#endif
// alloc 4 lines temp
- const int kRowSize = (width + 31) & ~31;
- align_buffer_64(row, kRowSize * 4 * sizeof(uint16_t));
+ const int row_size = (width + 31) & ~31;
+ align_buffer_64(row, row_size * 4 * sizeof(uint16_t));
uint16_t* temp_u_1 = (uint16_t*)(row);
- uint16_t* temp_u_2 = (uint16_t*)(row) + kRowSize;
- uint16_t* temp_v_1 = (uint16_t*)(row) + kRowSize * 2;
- uint16_t* temp_v_2 = (uint16_t*)(row) + kRowSize * 3;
+ uint16_t* temp_u_2 = (uint16_t*)(row) + row_size;
+ uint16_t* temp_v_1 = (uint16_t*)(row) + row_size * 2;
+ uint16_t* temp_v_2 = (uint16_t*)(row) + row_size * 3;
- Scale2RowUp(src_u, 0, temp_u_1, kRowSize, width);
- Scale2RowUp(src_v, 0, temp_v_1, kRowSize, width);
+ ScaleRowUp2_Linear_12(src_u, temp_u_1, width);
+ ScaleRowUp2_Linear_12(src_v, temp_v_1, width);
I410ToARGBRow(src_y, temp_u_1, temp_v_1, dst_argb, yuvconstants, width);
dst_argb += dst_stride_argb;
src_y += src_stride_y;
for (y = 0; y < height - 2; y += 2) {
- Scale2RowUp(src_u, src_stride_u, temp_u_1, kRowSize, width);
- Scale2RowUp(src_v, src_stride_v, temp_v_1, kRowSize, width);
+ Scale2RowUp_Bilinear_12(src_u, src_stride_u, temp_u_1, row_size, width);
+ Scale2RowUp_Bilinear_12(src_v, src_stride_v, temp_v_1, row_size, width);
I410ToARGBRow(src_y, temp_u_1, temp_v_1, dst_argb, yuvconstants, width);
dst_argb += dst_stride_argb;
src_y += src_stride_y;
@@ -5894,8 +6598,8 @@ static int I010ToARGBMatrixBilinear(const uint16_t* src_y,
}
if (!(height & 1)) {
- Scale2RowUp(src_u, 0, temp_u_1, kRowSize, width);
- Scale2RowUp(src_v, 0, temp_v_1, kRowSize, width);
+ ScaleRowUp2_Linear_12(src_u, temp_u_1, width);
+ ScaleRowUp2_Linear_12(src_v, temp_v_1, width);
I410ToARGBRow(src_y, temp_u_1, temp_v_1, dst_argb, yuvconstants, width);
}
@@ -5919,8 +6623,9 @@ static int I210ToARGBMatrixLinear(const uint16_t* src_y,
const uint16_t* v_buf, uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants, int width) =
I410ToARGBRow_C;
- void (*ScaleRowUp)(const uint16_t* src_ptr, uint16_t* dst_ptr,
- int dst_width) = ScaleRowUp2_Linear_16_Any_C;
+ void (*ScaleRowUp2_Linear_12)(const uint16_t* src_ptr, uint16_t* dst_ptr,
+ int dst_width) = ScaleRowUp2_Linear_16_Any_C;
+ assert(yuvconstants);
if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) {
return -1;
}
@@ -5949,29 +6654,29 @@ static int I210ToARGBMatrixLinear(const uint16_t* src_y,
#if defined(HAS_SCALEROWUP2_LINEAR_12_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
- ScaleRowUp = ScaleRowUp2_Linear_12_Any_SSSE3;
+ ScaleRowUp2_Linear_12 = ScaleRowUp2_Linear_12_Any_SSSE3;
}
#endif
#if defined(HAS_SCALEROWUP2_LINEAR_12_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
- ScaleRowUp = ScaleRowUp2_Linear_12_Any_AVX2;
+ ScaleRowUp2_Linear_12 = ScaleRowUp2_Linear_12_Any_AVX2;
}
#endif
#if defined(HAS_SCALEROWUP2_LINEAR_12_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
- ScaleRowUp = ScaleRowUp2_Linear_12_Any_NEON;
+ ScaleRowUp2_Linear_12 = ScaleRowUp2_Linear_12_Any_NEON;
}
#endif
// alloc 2 lines temp
- const int kRowSize = (width + 31) & ~31;
- align_buffer_64(row, kRowSize * 2 * sizeof(uint16_t));
+ const int row_size = (width + 31) & ~31;
+ align_buffer_64(row, row_size * 2 * sizeof(uint16_t));
uint16_t* temp_u = (uint16_t*)(row);
- uint16_t* temp_v = (uint16_t*)(row) + kRowSize;
+ uint16_t* temp_v = (uint16_t*)(row) + row_size;
for (y = 0; y < height; ++y) {
- ScaleRowUp(src_u, temp_u, width);
- ScaleRowUp(src_v, temp_v, width);
+ ScaleRowUp2_Linear_12(src_u, temp_u, width);
+ ScaleRowUp2_Linear_12(src_v, temp_v, width);
I410ToARGBRow(src_y, temp_u, temp_v, dst_argb, yuvconstants, width);
dst_argb += dst_stride_argb;
src_y += src_stride_y;
@@ -6006,9 +6711,12 @@ static int I420AlphaToARGBMatrixBilinear(
int width) = I444AlphaToARGBRow_C;
void (*ARGBAttenuateRow)(const uint8_t* src_argb, uint8_t* dst_argb,
int width) = ARGBAttenuateRow_C;
- void (*Scale2RowUp)(const uint8_t* src_ptr, ptrdiff_t src_stride,
- uint8_t* dst_ptr, ptrdiff_t dst_stride, int dst_width) =
- ScaleRowUp2_Bilinear_Any_C;
+ void (*Scale2RowUp_Bilinear)(const uint8_t* src_ptr, ptrdiff_t src_stride,
+ uint8_t* dst_ptr, ptrdiff_t dst_stride,
+ int dst_width) = ScaleRowUp2_Bilinear_Any_C;
+ void (*ScaleRowUp2_Linear)(const uint8_t* src_ptr, uint8_t* dst_ptr,
+ int dst_width) = ScaleRowUp2_Linear_Any_C;
+ assert(yuvconstants);
if (!src_y || !src_u || !src_v || !src_a || !dst_argb || width <= 0 ||
height == 0) {
return -1;
@@ -6059,6 +6767,11 @@ static int I420AlphaToARGBMatrixBilinear(
}
}
#endif
+#if defined(HAS_I444ALPHATOARGBROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ I444AlphaToARGBRow = I444AlphaToARGBRow_RVV;
+ }
+#endif
#if defined(HAS_ARGBATTENUATEROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBAttenuateRow = ARGBAttenuateRow_Any_SSSE3;
@@ -6091,40 +6804,50 @@ static int I420AlphaToARGBMatrixBilinear(
}
}
#endif
-#if defined(HAS_SCALEROWUP2_LINEAR_SSE2)
+#if defined(HAS_ARGBATTENUATEROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ ARGBAttenuateRow = ARGBAttenuateRow_RVV;
+ }
+#endif
+
+#if defined(HAS_SCALEROWUP2_BILINEAR_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
- Scale2RowUp = ScaleRowUp2_Bilinear_Any_SSE2;
+ Scale2RowUp_Bilinear = ScaleRowUp2_Bilinear_Any_SSE2;
+ ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_SSE2;
}
#endif
-#if defined(HAS_SCALEROWUP2_LINEAR_SSSE3)
+#if defined(HAS_SCALEROWUP2_BILINEAR_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
- Scale2RowUp = ScaleRowUp2_Bilinear_Any_SSSE3;
+ Scale2RowUp_Bilinear = ScaleRowUp2_Bilinear_Any_SSSE3;
+ ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_SSSE3;
}
#endif
-#if defined(HAS_SCALEROWUP2_LINEAR_AVX2)
+#if defined(HAS_SCALEROWUP2_BILINEAR_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
- Scale2RowUp = ScaleRowUp2_Bilinear_Any_AVX2;
+ Scale2RowUp_Bilinear = ScaleRowUp2_Bilinear_Any_AVX2;
+ ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_AVX2;
}
#endif
-#if defined(HAS_SCALEROWUP2_LINEAR_NEON)
+#if defined(HAS_SCALEROWUP2_BILINEAR_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
- Scale2RowUp = ScaleRowUp2_Bilinear_Any_NEON;
+ Scale2RowUp_Bilinear = ScaleRowUp2_Bilinear_Any_NEON;
+ ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_NEON;
}
#endif
// alloc 4 lines temp
- const int kRowSize = (width + 31) & ~31;
- align_buffer_64(row, kRowSize * 4);
+ const int row_size = (width + 31) & ~31;
+ align_buffer_64(row, row_size * 4);
uint8_t* temp_u_1 = row;
- uint8_t* temp_u_2 = row + kRowSize;
- uint8_t* temp_v_1 = row + kRowSize * 2;
- uint8_t* temp_v_2 = row + kRowSize * 3;
+ uint8_t* temp_u_2 = row + row_size;
+ uint8_t* temp_v_1 = row + row_size * 2;
+ uint8_t* temp_v_2 = row + row_size * 3;
- Scale2RowUp(src_u, 0, temp_u_1, kRowSize, width);
- Scale2RowUp(src_v, 0, temp_v_1, kRowSize, width);
+ ScaleRowUp2_Linear(src_u, temp_u_1, width);
+ ScaleRowUp2_Linear(src_v, temp_v_1, width);
I444AlphaToARGBRow(src_y, temp_u_1, temp_v_1, src_a, dst_argb, yuvconstants,
width);
if (attenuate) {
@@ -6135,8 +6858,8 @@ static int I420AlphaToARGBMatrixBilinear(
src_a += src_stride_a;
for (y = 0; y < height - 2; y += 2) {
- Scale2RowUp(src_u, src_stride_u, temp_u_1, kRowSize, width);
- Scale2RowUp(src_v, src_stride_v, temp_v_1, kRowSize, width);
+ Scale2RowUp_Bilinear(src_u, src_stride_u, temp_u_1, row_size, width);
+ Scale2RowUp_Bilinear(src_v, src_stride_v, temp_v_1, row_size, width);
I444AlphaToARGBRow(src_y, temp_u_1, temp_v_1, src_a, dst_argb, yuvconstants,
width);
if (attenuate) {
@@ -6158,8 +6881,8 @@ static int I420AlphaToARGBMatrixBilinear(
}
if (!(height & 1)) {
- Scale2RowUp(src_u, 0, temp_u_1, kRowSize, width);
- Scale2RowUp(src_v, 0, temp_v_1, kRowSize, width);
+ ScaleRowUp2_Linear(src_u, temp_u_1, width);
+ ScaleRowUp2_Linear(src_v, temp_v_1, width);
I444AlphaToARGBRow(src_y, temp_u_1, temp_v_1, src_a, dst_argb, yuvconstants,
width);
if (attenuate) {
@@ -6193,8 +6916,9 @@ static int I422AlphaToARGBMatrixLinear(const uint8_t* src_y,
int width) = I444AlphaToARGBRow_C;
void (*ARGBAttenuateRow)(const uint8_t* src_argb, uint8_t* dst_argb,
int width) = ARGBAttenuateRow_C;
- void (*ScaleRowUp)(const uint8_t* src_ptr, uint8_t* dst_ptr, int dst_width) =
- ScaleRowUp2_Linear_Any_C;
+ void (*ScaleRowUp2_Linear)(const uint8_t* src_ptr, uint8_t* dst_ptr,
+ int dst_width) = ScaleRowUp2_Linear_Any_C;
+ assert(yuvconstants);
if (!src_y || !src_u || !src_v || !src_a || !dst_argb || width <= 0 ||
height == 0) {
return -1;
@@ -6245,6 +6969,11 @@ static int I422AlphaToARGBMatrixLinear(const uint8_t* src_y,
}
}
#endif
+#if defined(HAS_I444ALPHATOARGBROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ I444AlphaToARGBRow = I444AlphaToARGBRow_RVV;
+ }
+#endif
#if defined(HAS_ARGBATTENUATEROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBAttenuateRow = ARGBAttenuateRow_Any_SSSE3;
@@ -6277,36 +7006,42 @@ static int I422AlphaToARGBMatrixLinear(const uint8_t* src_y,
}
}
#endif
+#if defined(HAS_ARGBATTENUATEROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ ARGBAttenuateRow = ARGBAttenuateRow_RVV;
+ }
+#endif
+
#if defined(HAS_SCALEROWUP2_LINEAR_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
- ScaleRowUp = ScaleRowUp2_Linear_Any_SSE2;
+ ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_SSE2;
}
#endif
#if defined(HAS_SCALEROWUP2_LINEAR_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
- ScaleRowUp = ScaleRowUp2_Linear_Any_SSSE3;
+ ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_SSSE3;
}
#endif
#if defined(HAS_SCALEROWUP2_LINEAR_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
- ScaleRowUp = ScaleRowUp2_Linear_Any_AVX2;
+ ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_AVX2;
}
#endif
#if defined(HAS_SCALEROWUP2_LINEAR_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
- ScaleRowUp = ScaleRowUp2_Linear_Any_NEON;
+ ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_NEON;
}
#endif
// alloc 2 lines temp
- const int kRowSize = (width + 31) & ~31;
- align_buffer_64(row, kRowSize * 2);
+ const int row_size = (width + 31) & ~31;
+ align_buffer_64(row, row_size * 2);
uint8_t* temp_u = row;
- uint8_t* temp_v = row + kRowSize;
+ uint8_t* temp_v = row + row_size;
for (y = 0; y < height; ++y) {
- ScaleRowUp(src_u, temp_u, width);
- ScaleRowUp(src_v, temp_v, width);
+ ScaleRowUp2_Linear(src_u, temp_u, width);
+ ScaleRowUp2_Linear(src_v, temp_v, width);
I444AlphaToARGBRow(src_y, temp_u, temp_v, src_a, dst_argb, yuvconstants,
width);
if (attenuate) {
@@ -6346,9 +7081,12 @@ static int I010AlphaToARGBMatrixBilinear(
int width) = I410AlphaToARGBRow_C;
void (*ARGBAttenuateRow)(const uint8_t* src_argb, uint8_t* dst_argb,
int width) = ARGBAttenuateRow_C;
- void (*Scale2RowUp)(const uint16_t* src_ptr, ptrdiff_t src_stride,
- uint16_t* dst_ptr, ptrdiff_t dst_stride, int dst_width) =
- ScaleRowUp2_Bilinear_16_Any_C;
+ void (*Scale2RowUp_Bilinear_12)(
+ const uint16_t* src_ptr, ptrdiff_t src_stride, uint16_t* dst_ptr,
+ ptrdiff_t dst_stride, int dst_width) = ScaleRowUp2_Bilinear_16_Any_C;
+ void (*ScaleRowUp2_Linear_12)(const uint16_t* src_ptr, uint16_t* dst_ptr,
+ int dst_width) = ScaleRowUp2_Linear_16_Any_C;
+ assert(yuvconstants);
if (!src_y || !src_u || !src_v || !src_a || !dst_argb || width <= 0 ||
height == 0) {
return -1;
@@ -6407,35 +7145,43 @@ static int I010AlphaToARGBMatrixBilinear(
}
}
#endif
+#if defined(HAS_ARGBATTENUATEROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ ARGBAttenuateRow = ARGBAttenuateRow_RVV;
+ }
+#endif
-#if defined(HAS_SCALEROWUP2_LINEAR_12_SSSE3)
+#if defined(HAS_SCALEROWUP2_BILINEAR_12_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
- Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_SSSE3;
+ Scale2RowUp_Bilinear_12 = ScaleRowUp2_Bilinear_12_Any_SSSE3;
+ ScaleRowUp2_Linear_12 = ScaleRowUp2_Linear_12_Any_SSSE3;
}
#endif
-#if defined(HAS_SCALEROWUP2_LINEAR_12_AVX2)
+#if defined(HAS_SCALEROWUP2_BILINEAR_12_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
- Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_AVX2;
+ Scale2RowUp_Bilinear_12 = ScaleRowUp2_Bilinear_12_Any_AVX2;
+ ScaleRowUp2_Linear_12 = ScaleRowUp2_Linear_12_Any_AVX2;
}
#endif
-#if defined(HAS_SCALEROWUP2_LINEAR_12_NEON)
+#if defined(HAS_SCALEROWUP2_BILINEAR_12_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
- Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_NEON;
+ Scale2RowUp_Bilinear_12 = ScaleRowUp2_Bilinear_12_Any_NEON;
+ ScaleRowUp2_Linear_12 = ScaleRowUp2_Linear_12_Any_NEON;
}
#endif
// alloc 4 lines temp
- const int kRowSize = (width + 31) & ~31;
- align_buffer_64(row, kRowSize * 4 * sizeof(uint16_t));
+ const int row_size = (width + 31) & ~31;
+ align_buffer_64(row, row_size * 4 * sizeof(uint16_t));
uint16_t* temp_u_1 = (uint16_t*)(row);
- uint16_t* temp_u_2 = (uint16_t*)(row) + kRowSize;
- uint16_t* temp_v_1 = (uint16_t*)(row) + kRowSize * 2;
- uint16_t* temp_v_2 = (uint16_t*)(row) + kRowSize * 3;
+ uint16_t* temp_u_2 = (uint16_t*)(row) + row_size;
+ uint16_t* temp_v_1 = (uint16_t*)(row) + row_size * 2;
+ uint16_t* temp_v_2 = (uint16_t*)(row) + row_size * 3;
- Scale2RowUp(src_u, 0, temp_u_1, kRowSize, width);
- Scale2RowUp(src_v, 0, temp_v_1, kRowSize, width);
+ ScaleRowUp2_Linear_12(src_u, temp_u_1, width);
+ ScaleRowUp2_Linear_12(src_v, temp_v_1, width);
I410AlphaToARGBRow(src_y, temp_u_1, temp_v_1, src_a, dst_argb, yuvconstants,
width);
if (attenuate) {
@@ -6446,8 +7192,8 @@ static int I010AlphaToARGBMatrixBilinear(
src_a += src_stride_a;
for (y = 0; y < height - 2; y += 2) {
- Scale2RowUp(src_u, src_stride_u, temp_u_1, kRowSize, width);
- Scale2RowUp(src_v, src_stride_v, temp_v_1, kRowSize, width);
+ Scale2RowUp_Bilinear_12(src_u, src_stride_u, temp_u_1, row_size, width);
+ Scale2RowUp_Bilinear_12(src_v, src_stride_v, temp_v_1, row_size, width);
I410AlphaToARGBRow(src_y, temp_u_1, temp_v_1, src_a, dst_argb, yuvconstants,
width);
if (attenuate) {
@@ -6469,8 +7215,8 @@ static int I010AlphaToARGBMatrixBilinear(
}
if (!(height & 1)) {
- Scale2RowUp(src_u, 0, temp_u_1, kRowSize, width);
- Scale2RowUp(src_v, 0, temp_v_1, kRowSize, width);
+ ScaleRowUp2_Linear_12(src_u, temp_u_1, width);
+ ScaleRowUp2_Linear_12(src_v, temp_v_1, width);
I410AlphaToARGBRow(src_y, temp_u_1, temp_v_1, src_a, dst_argb, yuvconstants,
width);
if (attenuate) {
@@ -6504,8 +7250,9 @@ static int I210AlphaToARGBMatrixLinear(const uint16_t* src_y,
int width) = I410AlphaToARGBRow_C;
void (*ARGBAttenuateRow)(const uint8_t* src_argb, uint8_t* dst_argb,
int width) = ARGBAttenuateRow_C;
- void (*ScaleRowUp)(const uint16_t* src_ptr, uint16_t* dst_ptr,
- int dst_width) = ScaleRowUp2_Linear_16_Any_C;
+ void (*ScaleRowUp2_Linear)(const uint16_t* src_ptr, uint16_t* dst_ptr,
+ int dst_width) = ScaleRowUp2_Linear_16_Any_C;
+ assert(yuvconstants);
if (!src_y || !src_u || !src_v || !src_a || !dst_argb || width <= 0 ||
height == 0) {
return -1;
@@ -6564,32 +7311,37 @@ static int I210AlphaToARGBMatrixLinear(const uint16_t* src_y,
}
}
#endif
+#if defined(HAS_ARGBATTENUATEROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ ARGBAttenuateRow = ARGBAttenuateRow_RVV;
+ }
+#endif
#if defined(HAS_SCALEROWUP2_LINEAR_12_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
- ScaleRowUp = ScaleRowUp2_Linear_12_Any_SSSE3;
+ ScaleRowUp2_Linear = ScaleRowUp2_Linear_12_Any_SSSE3;
}
#endif
#if defined(HAS_SCALEROWUP2_LINEAR_12_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
- ScaleRowUp = ScaleRowUp2_Linear_12_Any_AVX2;
+ ScaleRowUp2_Linear = ScaleRowUp2_Linear_12_Any_AVX2;
}
#endif
#if defined(HAS_SCALEROWUP2_LINEAR_12_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
- ScaleRowUp = ScaleRowUp2_Linear_12_Any_NEON;
+ ScaleRowUp2_Linear = ScaleRowUp2_Linear_12_Any_NEON;
}
#endif
// alloc 2 lines temp
- const int kRowSize = (width + 31) & ~31;
- align_buffer_64(row, kRowSize * 2 * sizeof(uint16_t));
+ const int row_size = (width + 31) & ~31;
+ align_buffer_64(row, row_size * 2 * sizeof(uint16_t));
uint16_t* temp_u = (uint16_t*)(row);
- uint16_t* temp_v = (uint16_t*)(row) + kRowSize;
+ uint16_t* temp_v = (uint16_t*)(row) + row_size;
for (y = 0; y < height; ++y) {
- ScaleRowUp(src_u, temp_u, width);
- ScaleRowUp(src_v, temp_v, width);
+ ScaleRowUp2_Linear(src_u, temp_u, width);
+ ScaleRowUp2_Linear(src_v, temp_v, width);
I410AlphaToARGBRow(src_y, temp_u, temp_v, src_a, dst_argb, yuvconstants,
width);
if (attenuate) {
@@ -6618,9 +7370,10 @@ static int P010ToARGBMatrixBilinear(const uint16_t* src_y,
void (*P410ToARGBRow)(
const uint16_t* y_buf, const uint16_t* uv_buf, uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants, int width) = P410ToARGBRow_C;
- void (*Scale2RowUp)(const uint16_t* src_ptr, ptrdiff_t src_stride,
- uint16_t* dst_ptr, ptrdiff_t dst_stride, int dst_width) =
- ScaleUVRowUp2_Bilinear_16_Any_C;
+ void (*Scale2RowUp_Bilinear_16)(
+ const uint16_t* src_ptr, ptrdiff_t src_stride, uint16_t* dst_ptr,
+ ptrdiff_t dst_stride, int dst_width) = ScaleUVRowUp2_Bilinear_16_Any_C;
+ assert(yuvconstants);
if (!src_y || !src_uv || !dst_argb || width <= 0 || height == 0) {
return -1;
}
@@ -6649,35 +7402,35 @@ static int P010ToARGBMatrixBilinear(const uint16_t* src_y,
#ifdef HAS_SCALEUVROWUP2_BILINEAR_16_SSE41
if (TestCpuFlag(kCpuHasSSE41)) {
- Scale2RowUp = ScaleUVRowUp2_Bilinear_16_Any_SSE41;
+ Scale2RowUp_Bilinear_16 = ScaleUVRowUp2_Bilinear_16_Any_SSE41;
}
#endif
#ifdef HAS_SCALEUVROWUP2_BILINEAR_16_AVX2
if (TestCpuFlag(kCpuHasAVX2)) {
- Scale2RowUp = ScaleUVRowUp2_Bilinear_16_Any_AVX2;
+ Scale2RowUp_Bilinear_16 = ScaleUVRowUp2_Bilinear_16_Any_AVX2;
}
#endif
#ifdef HAS_SCALEUVROWUP2_BILINEAR_16_NEON
if (TestCpuFlag(kCpuHasNEON)) {
- Scale2RowUp = ScaleUVRowUp2_Bilinear_16_Any_NEON;
+ Scale2RowUp_Bilinear_16 = ScaleUVRowUp2_Bilinear_16_Any_NEON;
}
#endif
// alloc 2 lines temp
- const int kRowSize = (2 * width + 31) & ~31;
- align_buffer_64(row, kRowSize * 2 * sizeof(uint16_t));
+ const int row_size = (2 * width + 31) & ~31;
+ align_buffer_64(row, row_size * 2 * sizeof(uint16_t));
uint16_t* temp_uv_1 = (uint16_t*)(row);
- uint16_t* temp_uv_2 = (uint16_t*)(row) + kRowSize;
+ uint16_t* temp_uv_2 = (uint16_t*)(row) + row_size;
- Scale2RowUp(src_uv, 0, temp_uv_1, kRowSize, width);
+ Scale2RowUp_Bilinear_16(src_uv, 0, temp_uv_1, row_size, width);
P410ToARGBRow(src_y, temp_uv_1, dst_argb, yuvconstants, width);
dst_argb += dst_stride_argb;
src_y += src_stride_y;
for (y = 0; y < height - 2; y += 2) {
- Scale2RowUp(src_uv, src_stride_uv, temp_uv_1, kRowSize, width);
+ Scale2RowUp_Bilinear_16(src_uv, src_stride_uv, temp_uv_1, row_size, width);
P410ToARGBRow(src_y, temp_uv_1, dst_argb, yuvconstants, width);
dst_argb += dst_stride_argb;
src_y += src_stride_y;
@@ -6688,7 +7441,7 @@ static int P010ToARGBMatrixBilinear(const uint16_t* src_y,
}
if (!(height & 1)) {
- Scale2RowUp(src_uv, 0, temp_uv_1, kRowSize, width);
+ Scale2RowUp_Bilinear_16(src_uv, 0, temp_uv_1, row_size, width);
P410ToARGBRow(src_y, temp_uv_1, dst_argb, yuvconstants, width);
}
@@ -6709,8 +7462,9 @@ static int P210ToARGBMatrixLinear(const uint16_t* src_y,
void (*P410ToARGBRow)(
const uint16_t* y_buf, const uint16_t* uv_buf, uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants, int width) = P410ToARGBRow_C;
- void (*ScaleRowUp)(const uint16_t* src_uv, uint16_t* dst_uv, int dst_width) =
- ScaleUVRowUp2_Linear_16_Any_C;
+ void (*ScaleRowUp2_Linear)(const uint16_t* src_uv, uint16_t* dst_uv,
+ int dst_width) = ScaleUVRowUp2_Linear_16_Any_C;
+ assert(yuvconstants);
if (!src_y || !src_uv || !dst_argb || width <= 0 || height == 0) {
return -1;
}
@@ -6739,28 +7493,28 @@ static int P210ToARGBMatrixLinear(const uint16_t* src_y,
#ifdef HAS_SCALEUVROWUP2_LINEAR_16_SSE41
if (TestCpuFlag(kCpuHasSSE41)) {
- ScaleRowUp = ScaleUVRowUp2_Linear_16_Any_SSE41;
+ ScaleRowUp2_Linear = ScaleUVRowUp2_Linear_16_Any_SSE41;
}
#endif
#ifdef HAS_SCALEUVROWUP2_LINEAR_16_AVX2
if (TestCpuFlag(kCpuHasAVX2)) {
- ScaleRowUp = ScaleUVRowUp2_Linear_16_Any_AVX2;
+ ScaleRowUp2_Linear = ScaleUVRowUp2_Linear_16_Any_AVX2;
}
#endif
#ifdef HAS_SCALEUVROWUP2_LINEAR_16_NEON
if (TestCpuFlag(kCpuHasNEON)) {
- ScaleRowUp = ScaleUVRowUp2_Linear_16_Any_NEON;
+ ScaleRowUp2_Linear = ScaleUVRowUp2_Linear_16_Any_NEON;
}
#endif
- const int kRowSize = (2 * width + 31) & ~31;
- align_buffer_64(row, kRowSize * sizeof(uint16_t));
+ const int row_size = (2 * width + 31) & ~31;
+ align_buffer_64(row, row_size * sizeof(uint16_t));
uint16_t* temp_uv = (uint16_t*)(row);
for (y = 0; y < height; ++y) {
- ScaleRowUp(src_uv, temp_uv, width);
+ ScaleRowUp2_Linear(src_uv, temp_uv, width);
P410ToARGBRow(src_y, temp_uv, dst_argb, yuvconstants, width);
dst_argb += dst_stride_argb;
src_y += src_stride_y;
@@ -6784,9 +7538,10 @@ static int P010ToAR30MatrixBilinear(const uint16_t* src_y,
void (*P410ToAR30Row)(
const uint16_t* y_buf, const uint16_t* uv_buf, uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants, int width) = P410ToAR30Row_C;
- void (*Scale2RowUp)(const uint16_t* src_ptr, ptrdiff_t src_stride,
- uint16_t* dst_ptr, ptrdiff_t dst_stride, int dst_width) =
- ScaleUVRowUp2_Bilinear_16_Any_C;
+ void (*Scale2RowUp_Bilinear_16)(
+ const uint16_t* src_ptr, ptrdiff_t src_stride, uint16_t* dst_ptr,
+ ptrdiff_t dst_stride, int dst_width) = ScaleUVRowUp2_Bilinear_16_Any_C;
+ assert(yuvconstants);
if (!src_y || !src_uv || !dst_ar30 || width <= 0 || height == 0) {
return -1;
}
@@ -6815,35 +7570,35 @@ static int P010ToAR30MatrixBilinear(const uint16_t* src_y,
#ifdef HAS_SCALEUVROWUP2_BILINEAR_16_SSE41
if (TestCpuFlag(kCpuHasSSE41)) {
- Scale2RowUp = ScaleUVRowUp2_Bilinear_16_Any_SSE41;
+ Scale2RowUp_Bilinear_16 = ScaleUVRowUp2_Bilinear_16_Any_SSE41;
}
#endif
#ifdef HAS_SCALEUVROWUP2_BILINEAR_16_AVX2
if (TestCpuFlag(kCpuHasAVX2)) {
- Scale2RowUp = ScaleUVRowUp2_Bilinear_16_Any_AVX2;
+ Scale2RowUp_Bilinear_16 = ScaleUVRowUp2_Bilinear_16_Any_AVX2;
}
#endif
#ifdef HAS_SCALEUVROWUP2_BILINEAR_16_NEON
if (TestCpuFlag(kCpuHasNEON)) {
- Scale2RowUp = ScaleUVRowUp2_Bilinear_16_Any_NEON;
+ Scale2RowUp_Bilinear_16 = ScaleUVRowUp2_Bilinear_16_Any_NEON;
}
#endif
// alloc 2 lines temp
- const int kRowSize = (2 * width + 31) & ~31;
- align_buffer_64(row, kRowSize * 2 * sizeof(uint16_t));
+ const int row_size = (2 * width + 31) & ~31;
+ align_buffer_64(row, row_size * 2 * sizeof(uint16_t));
uint16_t* temp_uv_1 = (uint16_t*)(row);
- uint16_t* temp_uv_2 = (uint16_t*)(row) + kRowSize;
+ uint16_t* temp_uv_2 = (uint16_t*)(row) + row_size;
- Scale2RowUp(src_uv, 0, temp_uv_1, kRowSize, width);
+ Scale2RowUp_Bilinear_16(src_uv, 0, temp_uv_1, row_size, width);
P410ToAR30Row(src_y, temp_uv_1, dst_ar30, yuvconstants, width);
dst_ar30 += dst_stride_ar30;
src_y += src_stride_y;
for (y = 0; y < height - 2; y += 2) {
- Scale2RowUp(src_uv, src_stride_uv, temp_uv_1, kRowSize, width);
+ Scale2RowUp_Bilinear_16(src_uv, src_stride_uv, temp_uv_1, row_size, width);
P410ToAR30Row(src_y, temp_uv_1, dst_ar30, yuvconstants, width);
dst_ar30 += dst_stride_ar30;
src_y += src_stride_y;
@@ -6854,7 +7609,7 @@ static int P010ToAR30MatrixBilinear(const uint16_t* src_y,
}
if (!(height & 1)) {
- Scale2RowUp(src_uv, 0, temp_uv_1, kRowSize, width);
+ Scale2RowUp_Bilinear_16(src_uv, 0, temp_uv_1, row_size, width);
P410ToAR30Row(src_y, temp_uv_1, dst_ar30, yuvconstants, width);
}
@@ -6875,8 +7630,9 @@ static int P210ToAR30MatrixLinear(const uint16_t* src_y,
void (*P410ToAR30Row)(
const uint16_t* y_buf, const uint16_t* uv_buf, uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants, int width) = P410ToAR30Row_C;
- void (*ScaleRowUp)(const uint16_t* src_uv, uint16_t* dst_uv, int dst_width) =
- ScaleUVRowUp2_Linear_16_Any_C;
+ void (*ScaleRowUp2_Linear)(const uint16_t* src_uv, uint16_t* dst_uv,
+ int dst_width) = ScaleUVRowUp2_Linear_16_Any_C;
+ assert(yuvconstants);
if (!src_y || !src_uv || !dst_ar30 || width <= 0 || height == 0) {
return -1;
}
@@ -6905,28 +7661,28 @@ static int P210ToAR30MatrixLinear(const uint16_t* src_y,
#ifdef HAS_SCALEUVROWUP2_LINEAR_16_SSE41
if (TestCpuFlag(kCpuHasSSE41)) {
- ScaleRowUp = ScaleUVRowUp2_Linear_16_Any_SSE41;
+ ScaleRowUp2_Linear = ScaleUVRowUp2_Linear_16_Any_SSE41;
}
#endif
#ifdef HAS_SCALEUVROWUP2_LINEAR_16_AVX2
if (TestCpuFlag(kCpuHasAVX2)) {
- ScaleRowUp = ScaleUVRowUp2_Linear_16_Any_AVX2;
+ ScaleRowUp2_Linear = ScaleUVRowUp2_Linear_16_Any_AVX2;
}
#endif
#ifdef HAS_SCALEUVROWUP2_LINEAR_16_NEON
if (TestCpuFlag(kCpuHasNEON)) {
- ScaleRowUp = ScaleUVRowUp2_Linear_16_Any_NEON;
+ ScaleRowUp2_Linear = ScaleUVRowUp2_Linear_16_Any_NEON;
}
#endif
- const int kRowSize = (2 * width + 31) & ~31;
- align_buffer_64(row, kRowSize * sizeof(uint16_t));
+ const int row_size = (2 * width + 31) & ~31;
+ align_buffer_64(row, row_size * sizeof(uint16_t));
uint16_t* temp_uv = (uint16_t*)(row);
for (y = 0; y < height; ++y) {
- ScaleRowUp(src_uv, temp_uv, width);
+ ScaleRowUp2_Linear(src_uv, temp_uv, width);
P410ToAR30Row(src_y, temp_uv, dst_ar30, yuvconstants, width);
dst_ar30 += dst_stride_ar30;
src_y += src_stride_y;
@@ -6937,6 +7693,133 @@ static int P210ToAR30MatrixLinear(const uint16_t* src_y,
return 0;
}
+static int I422ToRGB24MatrixLinear(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_rgb24,
+ int dst_stride_rgb24,
+ const struct YuvConstants* yuvconstants,
+ int width,
+ int height) {
+ int y;
+ void (*I444ToRGB24Row)(const uint8_t* y_buf, const uint8_t* u_buf,
+ const uint8_t* v_buf, uint8_t* rgb_buf,
+ const struct YuvConstants* yuvconstants, int width) =
+ I444ToRGB24Row_C;
+ void (*ScaleRowUp2_Linear)(const uint8_t* src_ptr, uint8_t* dst_ptr,
+ int dst_width) = ScaleRowUp2_Linear_Any_C;
+ assert(yuvconstants);
+ if (!src_y || !src_u || !src_v || !dst_rgb24 || width <= 0 || height == 0) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ dst_rgb24 = dst_rgb24 + (height - 1) * dst_stride_rgb24;
+ dst_stride_rgb24 = -dst_stride_rgb24;
+ }
+#if defined(HAS_I444TORGB24ROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ I444ToRGB24Row = I444ToRGB24Row_Any_SSSE3;
+ if (IS_ALIGNED(width, 16)) {
+ I444ToRGB24Row = I444ToRGB24Row_SSSE3;
+ }
+ }
+#endif
+#if defined(HAS_I444TORGB24ROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ I444ToRGB24Row = I444ToRGB24Row_Any_AVX2;
+ if (IS_ALIGNED(width, 32)) {
+ I444ToRGB24Row = I444ToRGB24Row_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_I444TORGB24ROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ I444ToRGB24Row = I444ToRGB24Row_Any_NEON;
+ if (IS_ALIGNED(width, 8)) {
+ I444ToRGB24Row = I444ToRGB24Row_NEON;
+ }
+ }
+#endif
+#if defined(HAS_I444TORGB24ROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ I444ToRGB24Row = I444ToRGB24Row_RVV;
+ }
+#endif
+#if defined(HAS_SCALEROWUP2_LINEAR_SSE2)
+ if (TestCpuFlag(kCpuHasSSE2)) {
+ ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_SSE2;
+ }
+#endif
+#if defined(HAS_SCALEROWUP2_LINEAR_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_SSSE3;
+ }
+#endif
+#if defined(HAS_SCALEROWUP2_LINEAR_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_AVX2;
+ }
+#endif
+#if defined(HAS_SCALEROWUP2_LINEAR_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_NEON;
+ }
+#endif
+
+ // alloc 2 lines temp
+ const int row_size = (width + 31) & ~31;
+ align_buffer_64(row, row_size * 2);
+ uint8_t* temp_u = row;
+ uint8_t* temp_v = row + row_size;
+
+ for (y = 0; y < height; ++y) {
+ ScaleRowUp2_Linear(src_u, temp_u, width);
+ ScaleRowUp2_Linear(src_v, temp_v, width);
+ I444ToRGB24Row(src_y, temp_u, temp_v, dst_rgb24, yuvconstants, width);
+ dst_rgb24 += dst_stride_rgb24;
+ src_y += src_stride_y;
+ src_u += src_stride_u;
+ src_v += src_stride_v;
+ }
+
+ free_aligned_buffer_64(row);
+ return 0;
+}
+
+LIBYUV_API
+int I422ToRGB24MatrixFilter(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_rgb24,
+ int dst_stride_rgb24,
+ const struct YuvConstants* yuvconstants,
+ int width,
+ int height,
+ enum FilterMode filter) {
+ switch (filter) {
+ case kFilterNone:
+ return I422ToRGB24Matrix(src_y, src_stride_y, src_u, src_stride_u, src_v,
+ src_stride_v, dst_rgb24, dst_stride_rgb24,
+ yuvconstants, width, height);
+ case kFilterBilinear:
+ case kFilterBox:
+ case kFilterLinear:
+ return I422ToRGB24MatrixLinear(
+ src_y, src_stride_y, src_u, src_stride_u, src_v, src_stride_v,
+ dst_rgb24, dst_stride_rgb24, yuvconstants, width, height);
+ }
+
+ return -1;
+}
+
LIBYUV_API
int I420ToARGBMatrixFilter(const uint8_t* src_y,
int src_stride_y,
@@ -6998,6 +7881,35 @@ int I422ToARGBMatrixFilter(const uint8_t* src_y,
}
LIBYUV_API
+int I420ToRGB24MatrixFilter(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_rgb24,
+ int dst_stride_rgb24,
+ const struct YuvConstants* yuvconstants,
+ int width,
+ int height,
+ enum FilterMode filter) {
+ switch (filter) {
+ case kFilterNone:
+ return I420ToRGB24Matrix(src_y, src_stride_y, src_u, src_stride_u, src_v,
+ src_stride_v, dst_rgb24, dst_stride_rgb24,
+ yuvconstants, width, height);
+ case kFilterLinear: // TODO(fb): Implement Linear using Bilinear stride 0
+ case kFilterBilinear:
+ case kFilterBox:
+ return I420ToRGB24MatrixBilinear(
+ src_y, src_stride_y, src_u, src_stride_u, src_v, src_stride_v,
+ dst_rgb24, dst_stride_rgb24, yuvconstants, width, height);
+ }
+
+ return -1;
+}
+
+LIBYUV_API
int I010ToAR30MatrixFilter(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
@@ -7015,13 +7927,12 @@ int I010ToAR30MatrixFilter(const uint16_t* src_y,
return I010ToAR30Matrix(src_y, src_stride_y, src_u, src_stride_u, src_v,
src_stride_v, dst_ar30, dst_stride_ar30,
yuvconstants, width, height);
+ case kFilterLinear: // TODO(fb): Implement Linear using Bilinear stride 0
case kFilterBilinear:
case kFilterBox:
return I010ToAR30MatrixBilinear(
src_y, src_stride_y, src_u, src_stride_u, src_v, src_stride_v,
dst_ar30, dst_stride_ar30, yuvconstants, width, height);
- case kFilterLinear:
- return -1;
}
return -1;
@@ -7074,13 +7985,12 @@ int I010ToARGBMatrixFilter(const uint16_t* src_y,
return I010ToARGBMatrix(src_y, src_stride_y, src_u, src_stride_u, src_v,
src_stride_v, dst_argb, dst_stride_argb,
yuvconstants, width, height);
+ case kFilterLinear: // TODO(fb): Implement Linear using Bilinear stride 0
case kFilterBilinear:
case kFilterBox:
return I010ToARGBMatrixBilinear(
src_y, src_stride_y, src_u, src_stride_u, src_v, src_stride_v,
dst_argb, dst_stride_argb, yuvconstants, width, height);
- case kFilterLinear:
- return -1;
}
return -1;
@@ -7137,14 +8047,13 @@ int I420AlphaToARGBMatrixFilter(const uint8_t* src_y,
src_v, src_stride_v, src_a, src_stride_a,
dst_argb, dst_stride_argb, yuvconstants,
width, height, attenuate);
+ case kFilterLinear: // TODO(fb): Implement Linear using Bilinear stride 0
case kFilterBilinear:
case kFilterBox:
return I420AlphaToARGBMatrixBilinear(
src_y, src_stride_y, src_u, src_stride_u, src_v, src_stride_v, src_a,
src_stride_a, dst_argb, dst_stride_argb, yuvconstants, width, height,
attenuate);
- case kFilterLinear:
- return -1;
}
return -1;
@@ -7206,14 +8115,13 @@ int I010AlphaToARGBMatrixFilter(const uint16_t* src_y,
src_v, src_stride_v, src_a, src_stride_a,
dst_argb, dst_stride_argb, yuvconstants,
width, height, attenuate);
+ case kFilterLinear: // TODO(fb): Implement Linear using Bilinear stride 0
case kFilterBilinear:
case kFilterBox:
return I010AlphaToARGBMatrixBilinear(
src_y, src_stride_y, src_u, src_stride_u, src_v, src_stride_v, src_a,
src_stride_a, dst_argb, dst_stride_argb, yuvconstants, width, height,
attenuate);
- case kFilterLinear:
- return -1;
}
return -1;
@@ -7253,6 +8161,8 @@ int I210AlphaToARGBMatrixFilter(const uint16_t* src_y,
return -1;
}
+// TODO(fb): Verify this function works correctly. P010 is like NV12 but 10 bit
+// UV is biplanar.
LIBYUV_API
int P010ToARGBMatrixFilter(const uint16_t* src_y,
int src_stride_y,
@@ -7269,13 +8179,12 @@ int P010ToARGBMatrixFilter(const uint16_t* src_y,
return P010ToARGBMatrix(src_y, src_stride_y, src_uv, src_stride_uv,
dst_argb, dst_stride_argb, yuvconstants, width,
height);
+ case kFilterLinear: // TODO(fb): Implement Linear using Bilinear stride 0
case kFilterBilinear:
case kFilterBox:
return P010ToARGBMatrixBilinear(src_y, src_stride_y, src_uv,
src_stride_uv, dst_argb, dst_stride_argb,
yuvconstants, width, height);
- case kFilterLinear:
- return -1;
}
return -1;
@@ -7324,13 +8233,12 @@ int P010ToAR30MatrixFilter(const uint16_t* src_y,
return P010ToAR30Matrix(src_y, src_stride_y, src_uv, src_stride_uv,
dst_ar30, dst_stride_ar30, yuvconstants, width,
height);
+ case kFilterLinear: // TODO(fb): Implement Linear using Bilinear stride 0
case kFilterBilinear:
case kFilterBox:
return P010ToAR30MatrixBilinear(src_y, src_stride_y, src_uv,
src_stride_uv, dst_ar30, dst_stride_ar30,
yuvconstants, width, height);
- case kFilterLinear:
- return -1;
}
return -1;
diff --git a/files/source/convert_from.cc b/files/source/convert_from.cc
index 8bd07e4c..4102d610 100644
--- a/files/source/convert_from.cc
+++ b/files/source/convert_from.cc
@@ -446,6 +446,14 @@ int I420ToYUY2(const uint8_t* src_y,
}
}
#endif
+#if defined(HAS_I422TOYUY2ROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ I422ToYUY2Row = I422ToYUY2Row_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ I422ToYUY2Row = I422ToYUY2Row_LSX;
+ }
+ }
+#endif
#if defined(HAS_I422TOYUY2ROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
I422ToYUY2Row = I422ToYUY2Row_Any_LASX;
@@ -533,6 +541,14 @@ int I422ToUYVY(const uint8_t* src_y,
}
}
#endif
+#if defined(HAS_I422TOUYVYROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ I422ToUYVYRow = I422ToUYVYRow_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ I422ToUYVYRow = I422ToUYVYRow_LSX;
+ }
+ }
+#endif
#if defined(HAS_I422TOUYVYROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
I422ToUYVYRow = I422ToUYVYRow_Any_LASX;
@@ -608,6 +624,14 @@ int I420ToUYVY(const uint8_t* src_y,
}
}
#endif
+#if defined(HAS_I422TOUYVYROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ I422ToUYVYRow = I422ToUYVYRow_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ I422ToUYVYRow = I422ToUYVYRow_LSX;
+ }
+ }
+#endif
#if defined(HAS_I422TOUYVYROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
I422ToUYVYRow = I422ToUYVYRow_Any_LASX;
diff --git a/files/source/convert_from_argb.cc b/files/source/convert_from_argb.cc
index e50c2af3..c3d037c4 100644
--- a/files/source/convert_from_argb.cc
+++ b/files/source/convert_from_argb.cc
@@ -76,6 +76,14 @@ int ARGBToI444(const uint8_t* src_argb,
}
}
#endif
+#if defined(HAS_ARGBTOUV444ROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ ARGBToUV444Row = ARGBToUV444Row_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBToUV444Row = ARGBToUV444Row_LSX;
+ }
+ }
+#endif
#if defined(HAS_ARGBTOUV444ROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
ARGBToUV444Row = ARGBToUV444Row_Any_LASX;
@@ -116,6 +124,14 @@ int ARGBToI444(const uint8_t* src_argb,
}
}
#endif
+#if defined(HAS_ARGBTOYROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ ARGBToYRow = ARGBToYRow_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBToYRow = ARGBToYRow_LSX;
+ }
+ }
+#endif
#if defined(HAS_ARGBTOYROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
ARGBToYRow = ARGBToYRow_Any_LASX;
@@ -124,6 +140,11 @@ int ARGBToI444(const uint8_t* src_argb,
}
}
#endif
+#if defined(HAS_ARGBTOYROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ ARGBToYRow = ARGBToYRow_RVV;
+ }
+#endif
for (y = 0; y < height; ++y) {
ARGBToUV444Row(src_argb, dst_u, dst_v, width);
@@ -230,7 +251,24 @@ int ARGBToI422(const uint8_t* src_argb,
}
}
#endif
-
+#if defined(HAS_ARGBTOYROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ ARGBToYRow = ARGBToYRow_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBToYRow = ARGBToYRow_LSX;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTOYROW_LSX) && defined(HAS_ARGBTOUVROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ ARGBToYRow = ARGBToYRow_Any_LSX;
+ ARGBToUVRow = ARGBToUVRow_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBToYRow = ARGBToYRow_LSX;
+ ARGBToUVRow = ARGBToUVRow_LSX;
+ }
+ }
+#endif
#if defined(HAS_ARGBTOYROW_LASX) && defined(HAS_ARGBTOUVROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
ARGBToYRow = ARGBToYRow_Any_LASX;
@@ -241,6 +279,11 @@ int ARGBToI422(const uint8_t* src_argb,
}
}
#endif
+#if defined(HAS_ARGBTOYROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ ARGBToYRow = ARGBToYRow_RVV;
+ }
+#endif
for (y = 0; y < height; ++y) {
ARGBToUVRow(src_argb, 0, dst_u, dst_v, width);
@@ -340,6 +383,14 @@ int ARGBToNV12(const uint8_t* src_argb,
}
}
#endif
+#if defined(HAS_ARGBTOYROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ ARGBToYRow = ARGBToYRow_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBToYRow = ARGBToYRow_LSX;
+ }
+ }
+#endif
#if defined(HAS_ARGBTOYROW_LASX) && defined(HAS_ARGBTOUVROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
ARGBToYRow = ARGBToYRow_Any_LASX;
@@ -350,6 +401,11 @@ int ARGBToNV12(const uint8_t* src_argb,
}
}
#endif
+#if defined(HAS_ARGBTOYROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ ARGBToYRow = ARGBToYRow_RVV;
+ }
+#endif
#if defined(HAS_MERGEUVROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
MergeUVRow_ = MergeUVRow_Any_SSE2;
@@ -361,11 +417,19 @@ int ARGBToNV12(const uint8_t* src_argb,
#if defined(HAS_MERGEUVROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
MergeUVRow_ = MergeUVRow_Any_AVX2;
- if (IS_ALIGNED(halfwidth, 32)) {
+ if (IS_ALIGNED(halfwidth, 16)) {
MergeUVRow_ = MergeUVRow_AVX2;
}
}
#endif
+#if defined(HAS_MERGEUVROW_AVX512BW)
+ if (TestCpuFlag(kCpuHasAVX512BW)) {
+ MergeUVRow_ = MergeUVRow_Any_AVX512BW;
+ if (IS_ALIGNED(halfwidth, 32)) {
+ MergeUVRow_ = MergeUVRow_AVX512BW;
+ }
+ }
+#endif
#if defined(HAS_MERGEUVROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
MergeUVRow_ = MergeUVRow_Any_NEON;
@@ -390,6 +454,11 @@ int ARGBToNV12(const uint8_t* src_argb,
}
}
#endif
+#if defined(HAS_MERGEUVROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ MergeUVRow_ = MergeUVRow_RVV;
+ }
+#endif
{
// Allocate a rows of uv.
align_buffer_64(row_u, ((halfwidth + 31) & ~31) * 2);
@@ -502,6 +571,24 @@ int ARGBToNV21(const uint8_t* src_argb,
}
}
#endif
+#if defined(HAS_ARGBTOYROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ ARGBToYRow = ARGBToYRow_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBToYRow = ARGBToYRow_LSX;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTOYROW_LSX) && defined(HAS_ARGBTOUVROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ ARGBToYRow = ARGBToYRow_Any_LSX;
+ ARGBToUVRow = ARGBToUVRow_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBToYRow = ARGBToYRow_LSX;
+ ARGBToUVRow = ARGBToUVRow_LSX;
+ }
+ }
+#endif
#if defined(HAS_ARGBTOYROW_LASX) && defined(HAS_ARGBTOUVROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
ARGBToYRow = ARGBToYRow_Any_LASX;
@@ -512,6 +599,11 @@ int ARGBToNV21(const uint8_t* src_argb,
}
}
#endif
+#if defined(HAS_ARGBTOYROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ ARGBToYRow = ARGBToYRow_RVV;
+ }
+#endif
#if defined(HAS_MERGEUVROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
MergeUVRow_ = MergeUVRow_Any_SSE2;
@@ -523,11 +615,19 @@ int ARGBToNV21(const uint8_t* src_argb,
#if defined(HAS_MERGEUVROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
MergeUVRow_ = MergeUVRow_Any_AVX2;
- if (IS_ALIGNED(halfwidth, 32)) {
+ if (IS_ALIGNED(halfwidth, 16)) {
MergeUVRow_ = MergeUVRow_AVX2;
}
}
#endif
+#if defined(HAS_MERGEUVROW_AVX512BW)
+ if (TestCpuFlag(kCpuHasAVX512BW)) {
+ MergeUVRow_ = MergeUVRow_Any_AVX512BW;
+ if (IS_ALIGNED(halfwidth, 64)) {
+ MergeUVRow_ = MergeUVRow_AVX512BW;
+ }
+ }
+#endif
#if defined(HAS_MERGEUVROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
MergeUVRow_ = MergeUVRow_Any_NEON;
@@ -552,6 +652,11 @@ int ARGBToNV21(const uint8_t* src_argb,
}
}
#endif
+#if defined(HAS_MERGEUVROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ MergeUVRow_ = MergeUVRow_RVV;
+ }
+#endif
{
// Allocate a rows of uv.
align_buffer_64(row_u, ((halfwidth + 31) & ~31) * 2);
@@ -663,6 +768,27 @@ int ABGRToNV12(const uint8_t* src_abgr,
}
}
#endif
+#if defined(HAS_ABGRTOYROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ ABGRToYRow = ABGRToYRow_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ ABGRToYRow = ABGRToYRow_LSX;
+ }
+ }
+#endif
+#if defined(HAS_ABGRTOYROW_LASX)
+ if (TestCpuFlag(kCpuHasLASX)) {
+ ABGRToYRow = ABGRToYRow_Any_LASX;
+ if (IS_ALIGNED(width, 32)) {
+ ABGRToYRow = ABGRToYRow_LASX;
+ }
+ }
+#endif
+#if defined(HAS_ABGRTOYROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ ABGRToYRow = ABGRToYRow_RVV;
+ }
+#endif
#if defined(HAS_MERGEUVROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
MergeUVRow_ = MergeUVRow_Any_SSE2;
@@ -674,11 +800,19 @@ int ABGRToNV12(const uint8_t* src_abgr,
#if defined(HAS_MERGEUVROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
MergeUVRow_ = MergeUVRow_Any_AVX2;
- if (IS_ALIGNED(halfwidth, 32)) {
+ if (IS_ALIGNED(halfwidth, 16)) {
MergeUVRow_ = MergeUVRow_AVX2;
}
}
#endif
+#if defined(HAS_MERGEUVROW_AVX512BW)
+ if (TestCpuFlag(kCpuHasAVX512BW)) {
+ MergeUVRow_ = MergeUVRow_Any_AVX512BW;
+ if (IS_ALIGNED(halfwidth, 64)) {
+ MergeUVRow_ = MergeUVRow_AVX512BW;
+ }
+ }
+#endif
#if defined(HAS_MERGEUVROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
MergeUVRow_ = MergeUVRow_Any_NEON;
@@ -703,6 +837,11 @@ int ABGRToNV12(const uint8_t* src_abgr,
}
}
#endif
+#if defined(HAS_MERGEUVROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ MergeUVRow_ = MergeUVRow_RVV;
+ }
+#endif
{
// Allocate a rows of uv.
align_buffer_64(row_u, ((halfwidth + 31) & ~31) * 2);
@@ -815,6 +954,27 @@ int ABGRToNV21(const uint8_t* src_abgr,
}
}
#endif
+#if defined(HAS_ABGRTOYROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ ABGRToYRow = ABGRToYRow_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ ABGRToYRow = ABGRToYRow_LSX;
+ }
+ }
+#endif
+#if defined(HAS_ABGRTOYROW_LASX)
+ if (TestCpuFlag(kCpuHasLASX)) {
+ ABGRToYRow = ABGRToYRow_Any_LASX;
+ if (IS_ALIGNED(width, 32)) {
+ ABGRToYRow = ABGRToYRow_LASX;
+ }
+ }
+#endif
+#if defined(HAS_ABGRTOYROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ ABGRToYRow = ABGRToYRow_RVV;
+ }
+#endif
#if defined(HAS_MERGEUVROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
MergeUVRow_ = MergeUVRow_Any_SSE2;
@@ -826,11 +986,19 @@ int ABGRToNV21(const uint8_t* src_abgr,
#if defined(HAS_MERGEUVROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
MergeUVRow_ = MergeUVRow_Any_AVX2;
- if (IS_ALIGNED(halfwidth, 32)) {
+ if (IS_ALIGNED(halfwidth, 16)) {
MergeUVRow_ = MergeUVRow_AVX2;
}
}
#endif
+#if defined(HAS_MERGEUVROW_AVX512BW)
+ if (TestCpuFlag(kCpuHasAVX512BW)) {
+ MergeUVRow_ = MergeUVRow_Any_AVX512BW;
+ if (IS_ALIGNED(halfwidth, 64)) {
+ MergeUVRow_ = MergeUVRow_AVX512BW;
+ }
+ }
+#endif
#if defined(HAS_MERGEUVROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
MergeUVRow_ = MergeUVRow_Any_NEON;
@@ -855,6 +1023,11 @@ int ABGRToNV21(const uint8_t* src_abgr,
}
}
#endif
+#if defined(HAS_MERGEUVROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ MergeUVRow_ = MergeUVRow_RVV;
+ }
+#endif
{
// Allocate a rows of uv.
align_buffer_64(row_u, ((halfwidth + 31) & ~31) * 2);
@@ -972,6 +1145,24 @@ int ARGBToYUY2(const uint8_t* src_argb,
}
}
#endif
+#if defined(HAS_ARGBTOYROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ ARGBToYRow = ARGBToYRow_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBToYRow = ARGBToYRow_LSX;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTOYROW_LSX) && defined(HAS_ARGBTOUVROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ ARGBToYRow = ARGBToYRow_Any_LSX;
+ ARGBToUVRow = ARGBToUVRow_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBToYRow = ARGBToYRow_LSX;
+ ARGBToUVRow = ARGBToUVRow_LSX;
+ }
+ }
+#endif
#if defined(HAS_ARGBTOYROW_LASX) && defined(HAS_ARGBTOUVROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
ARGBToYRow = ARGBToYRow_Any_LASX;
@@ -982,6 +1173,11 @@ int ARGBToYUY2(const uint8_t* src_argb,
}
}
#endif
+#if defined(HAS_ARGBTOYROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ ARGBToYRow = ARGBToYRow_RVV;
+ }
+#endif
#if defined(HAS_I422TOYUY2ROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
I422ToYUY2Row = I422ToYUY2Row_Any_SSE2;
@@ -1014,6 +1210,14 @@ int ARGBToYUY2(const uint8_t* src_argb,
}
}
#endif
+#if defined(HAS_I422TOYUY2ROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ I422ToYUY2Row = I422ToYUY2Row_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ I422ToYUY2Row = I422ToYUY2Row_LSX;
+ }
+ }
+#endif
#if defined(HAS_I422TOYUY2ROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
I422ToYUY2Row = I422ToYUY2Row_Any_LASX;
@@ -1135,6 +1339,24 @@ int ARGBToUYVY(const uint8_t* src_argb,
}
}
#endif
+#if defined(HAS_ARGBTOYROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ ARGBToYRow = ARGBToYRow_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBToYRow = ARGBToYRow_LSX;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTOYROW_LSX) && defined(HAS_ARGBTOUVROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ ARGBToYRow = ARGBToYRow_Any_LSX;
+ ARGBToUVRow = ARGBToUVRow_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBToYRow = ARGBToYRow_LSX;
+ ARGBToUVRow = ARGBToUVRow_LSX;
+ }
+ }
+#endif
#if defined(HAS_ARGBTOYROW_LASX) && defined(HAS_ARGBTOUVROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
ARGBToYRow = ARGBToYRow_Any_LASX;
@@ -1145,6 +1367,11 @@ int ARGBToUYVY(const uint8_t* src_argb,
}
}
#endif
+#if defined(HAS_ARGBTOYROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ ARGBToYRow = ARGBToYRow_RVV;
+ }
+#endif
#if defined(HAS_I422TOUYVYROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
I422ToUYVYRow = I422ToUYVYRow_Any_SSE2;
@@ -1177,6 +1404,14 @@ int ARGBToUYVY(const uint8_t* src_argb,
}
}
#endif
+#if defined(HAS_I422TOUYVYROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ I422ToUYVYRow = I422ToUYVYRow_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ I422ToUYVYRow = I422ToUYVYRow_LSX;
+ }
+ }
+#endif
#if defined(HAS_I422TOUYVYROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
I422ToUYVYRow = I422ToUYVYRow_Any_LASX;
@@ -1262,6 +1497,14 @@ int ARGBToI400(const uint8_t* src_argb,
}
}
#endif
+#if defined(HAS_ARGBTOYROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ ARGBToYRow = ARGBToYRow_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBToYRow = ARGBToYRow_LSX;
+ }
+ }
+#endif
#if defined(HAS_ARGBTOYROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
ARGBToYRow = ARGBToYRow_Any_LASX;
@@ -1270,6 +1513,11 @@ int ARGBToI400(const uint8_t* src_argb,
}
}
#endif
+#if defined(HAS_ARGBTOYROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ ARGBToYRow = ARGBToYRow_RVV;
+ }
+#endif
for (y = 0; y < height; ++y) {
ARGBToYRow(src_argb, dst_y, width);
@@ -1360,6 +1608,14 @@ int ARGBToRGB24(const uint8_t* src_argb,
}
}
#endif
+#if defined(HAS_ARGBTORGB24ROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ ARGBToRGB24Row = ARGBToRGB24Row_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBToRGB24Row = ARGBToRGB24Row_LSX;
+ }
+ }
+#endif
#if defined(HAS_ARGBTORGB24ROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
ARGBToRGB24Row = ARGBToRGB24Row_Any_LASX;
@@ -1368,6 +1624,11 @@ int ARGBToRGB24(const uint8_t* src_argb,
}
}
#endif
+#if defined(HAS_ARGBTORGB24ROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ ARGBToRGB24Row = ARGBToRGB24Row_RVV;
+ }
+#endif
for (y = 0; y < height; ++y) {
ARGBToRGB24Row(src_argb, dst_rgb24, width);
@@ -1434,6 +1695,14 @@ int ARGBToRAW(const uint8_t* src_argb,
}
}
#endif
+#if defined(HAS_ARGBTORAWROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ ARGBToRAWRow = ARGBToRAWRow_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBToRAWRow = ARGBToRAWRow_LSX;
+ }
+ }
+#endif
#if defined(HAS_ARGBTORAWROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
ARGBToRAWRow = ARGBToRAWRow_Any_LASX;
@@ -1442,6 +1711,11 @@ int ARGBToRAW(const uint8_t* src_argb,
}
}
#endif
+#if defined(HAS_ARGBTORAWROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ ARGBToRAWRow = ARGBToRAWRow_RVV;
+ }
+#endif
for (y = 0; y < height; ++y) {
ARGBToRAWRow(src_argb, dst_raw, width);
@@ -1467,7 +1741,7 @@ int ARGBToRGB565Dither(const uint8_t* src_argb,
int height) {
int y;
void (*ARGBToRGB565DitherRow)(const uint8_t* src_argb, uint8_t* dst_rgb,
- const uint32_t dither4, int width) =
+ uint32_t dither4, int width) =
ARGBToRGB565DitherRow_C;
if (!src_argb || !dst_rgb565 || width <= 0 || height == 0) {
return -1;
@@ -1512,6 +1786,14 @@ int ARGBToRGB565Dither(const uint8_t* src_argb,
}
}
#endif
+#if defined(HAS_ARGBTORGB565DITHERROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_Any_LSX;
+ if (IS_ALIGNED(width, 8)) {
+ ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_LSX;
+ }
+ }
+#endif
#if defined(HAS_ARGBTORGB565DITHERROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_Any_LASX;
@@ -1589,6 +1871,15 @@ int ARGBToRGB565(const uint8_t* src_argb,
}
}
#endif
+#if defined(HAS_ARGBTORGB565ROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ ARGBToRGB565Row = ARGBToRGB565Row_Any_LSX;
+ if (IS_ALIGNED(width, 8)) {
+ ARGBToRGB565Row = ARGBToRGB565Row_LSX;
+ }
+ }
+#endif
+
#if defined(HAS_ARGBTORGB565ROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
ARGBToRGB565Row = ARGBToRGB565Row_Any_LASX;
@@ -1663,6 +1954,14 @@ int ARGBToARGB1555(const uint8_t* src_argb,
}
}
#endif
+#if defined(HAS_ARGBTOARGB1555ROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ ARGBToARGB1555Row = ARGBToARGB1555Row_Any_LSX;
+ if (IS_ALIGNED(width, 8)) {
+ ARGBToARGB1555Row = ARGBToARGB1555Row_LSX;
+ }
+ }
+#endif
#if defined(HAS_ARGBTOARGB1555ROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
ARGBToARGB1555Row = ARGBToARGB1555Row_Any_LASX;
@@ -1737,6 +2036,14 @@ int ARGBToARGB4444(const uint8_t* src_argb,
}
}
#endif
+#if defined(HAS_ARGBTOARGB4444ROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ ARGBToARGB4444Row = ARGBToARGB4444Row_Any_LSX;
+ if (IS_ALIGNED(width, 8)) {
+ ARGBToARGB4444Row = ARGBToARGB4444Row_LSX;
+ }
+ }
+#endif
#if defined(HAS_ARGBTOARGB4444ROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
ARGBToARGB4444Row = ARGBToARGB4444Row_Any_LASX;
@@ -1858,19 +2165,19 @@ int ARGBToJ420(const uint8_t* src_argb,
int src_stride_argb,
uint8_t* dst_yj,
int dst_stride_yj,
- uint8_t* dst_u,
- int dst_stride_u,
- uint8_t* dst_v,
- int dst_stride_v,
+ uint8_t* dst_uj,
+ int dst_stride_uj,
+ uint8_t* dst_vj,
+ int dst_stride_vj,
int width,
int height) {
int y;
void (*ARGBToUVJRow)(const uint8_t* src_argb0, int src_stride_argb,
- uint8_t* dst_u, uint8_t* dst_v, int width) =
+ uint8_t* dst_uj, uint8_t* dst_vj, int width) =
ARGBToUVJRow_C;
void (*ARGBToYJRow)(const uint8_t* src_argb, uint8_t* dst_yj, int width) =
ARGBToYJRow_C;
- if (!src_argb || !dst_yj || !dst_u || !dst_v || width <= 0 || height == 0) {
+ if (!src_argb || !dst_yj || !dst_uj || !dst_vj || width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
@@ -1879,6 +2186,22 @@ int ARGBToJ420(const uint8_t* src_argb,
src_argb = src_argb + (height - 1) * src_stride_argb;
src_stride_argb = -src_stride_argb;
}
+#if defined(HAS_ARGBTOYJROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ ARGBToYJRow = ARGBToYJRow_Any_NEON;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBToYJRow = ARGBToYJRow_NEON;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTOUVJROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ ARGBToUVJRow = ARGBToUVJRow_Any_NEON;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBToUVJRow = ARGBToUVJRow_NEON;
+ }
+ }
+#endif
#if defined(HAS_ARGBTOYJROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToYJRow = ARGBToYJRow_Any_SSSE3;
@@ -1903,19 +2226,11 @@ int ARGBToJ420(const uint8_t* src_argb,
}
}
#endif
-#if defined(HAS_ARGBTOYJROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- ARGBToYJRow = ARGBToYJRow_Any_NEON;
- if (IS_ALIGNED(width, 16)) {
- ARGBToYJRow = ARGBToYJRow_NEON;
- }
- }
-#endif
-#if defined(HAS_ARGBTOUVJROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- ARGBToUVJRow = ARGBToUVJRow_Any_NEON;
- if (IS_ALIGNED(width, 16)) {
- ARGBToUVJRow = ARGBToUVJRow_NEON;
+#if defined(HAS_ARGBTOUVJROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ ARGBToUVJRow = ARGBToUVJRow_Any_AVX2;
+ if (IS_ALIGNED(width, 32)) {
+ ARGBToUVJRow = ARGBToUVJRow_AVX2;
}
}
#endif
@@ -1951,18 +2266,23 @@ int ARGBToJ420(const uint8_t* src_argb,
}
}
#endif
+#if defined(HAS_ARGBTOYJROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ ARGBToYJRow = ARGBToYJRow_RVV;
+ }
+#endif
for (y = 0; y < height - 1; y += 2) {
- ARGBToUVJRow(src_argb, src_stride_argb, dst_u, dst_v, width);
+ ARGBToUVJRow(src_argb, src_stride_argb, dst_uj, dst_vj, width);
ARGBToYJRow(src_argb, dst_yj, width);
ARGBToYJRow(src_argb + src_stride_argb, dst_yj + dst_stride_yj, width);
src_argb += src_stride_argb * 2;
dst_yj += dst_stride_yj * 2;
- dst_u += dst_stride_u;
- dst_v += dst_stride_v;
+ dst_uj += dst_stride_uj;
+ dst_vj += dst_stride_vj;
}
if (height & 1) {
- ARGBToUVJRow(src_argb, 0, dst_u, dst_v, width);
+ ARGBToUVJRow(src_argb, 0, dst_uj, dst_vj, width);
ARGBToYJRow(src_argb, dst_yj, width);
}
return 0;
@@ -1974,19 +2294,19 @@ int ARGBToJ422(const uint8_t* src_argb,
int src_stride_argb,
uint8_t* dst_yj,
int dst_stride_yj,
- uint8_t* dst_u,
- int dst_stride_u,
- uint8_t* dst_v,
- int dst_stride_v,
+ uint8_t* dst_uj,
+ int dst_stride_uj,
+ uint8_t* dst_vj,
+ int dst_stride_vj,
int width,
int height) {
int y;
void (*ARGBToUVJRow)(const uint8_t* src_argb0, int src_stride_argb,
- uint8_t* dst_u, uint8_t* dst_v, int width) =
+ uint8_t* dst_uj, uint8_t* dst_vj, int width) =
ARGBToUVJRow_C;
void (*ARGBToYJRow)(const uint8_t* src_argb, uint8_t* dst_yj, int width) =
ARGBToYJRow_C;
- if (!src_argb || !dst_yj || !dst_u || !dst_v || width <= 0 || height == 0) {
+ if (!src_argb || !dst_yj || !dst_uj || !dst_vj || width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
@@ -1997,10 +2317,10 @@ int ARGBToJ422(const uint8_t* src_argb,
}
// Coalesce rows.
if (src_stride_argb == width * 4 && dst_stride_yj == width &&
- dst_stride_u * 2 == width && dst_stride_v * 2 == width) {
+ dst_stride_uj * 2 == width && dst_stride_vj * 2 == width) {
width *= height;
height = 1;
- src_stride_argb = dst_stride_yj = dst_stride_u = dst_stride_v = 0;
+ src_stride_argb = dst_stride_yj = dst_stride_uj = dst_stride_vj = 0;
}
#if defined(HAS_ARGBTOYJROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
@@ -2026,6 +2346,14 @@ int ARGBToJ422(const uint8_t* src_argb,
}
}
#endif
+#if defined(HAS_ARGBTOUVJROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ ARGBToUVJRow = ARGBToUVJRow_Any_AVX2;
+ if (IS_ALIGNED(width, 32)) {
+ ARGBToUVJRow = ARGBToUVJRow_AVX2;
+ }
+ }
+#endif
#if defined(HAS_ARGBTOYJROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ARGBToYJRow = ARGBToYJRow_Any_NEON;
@@ -2074,270 +2402,649 @@ int ARGBToJ422(const uint8_t* src_argb,
}
}
#endif
+#if defined(HAS_ARGBTOYJROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ ARGBToYJRow = ARGBToYJRow_RVV;
+ }
+#endif
for (y = 0; y < height; ++y) {
- ARGBToUVJRow(src_argb, 0, dst_u, dst_v, width);
+ ARGBToUVJRow(src_argb, 0, dst_uj, dst_vj, width);
ARGBToYJRow(src_argb, dst_yj, width);
src_argb += src_stride_argb;
dst_yj += dst_stride_yj;
- dst_u += dst_stride_u;
- dst_v += dst_stride_v;
+ dst_uj += dst_stride_uj;
+ dst_vj += dst_stride_vj;
}
return 0;
}
-// Convert ARGB to AR64.
+// Convert ARGB to J400.
LIBYUV_API
-int ARGBToAR64(const uint8_t* src_argb,
+int ARGBToJ400(const uint8_t* src_argb,
int src_stride_argb,
- uint16_t* dst_ar64,
- int dst_stride_ar64,
+ uint8_t* dst_yj,
+ int dst_stride_yj,
int width,
int height) {
int y;
- void (*ARGBToAR64Row)(const uint8_t* src_argb, uint16_t* dst_ar64,
- int width) = ARGBToAR64Row_C;
- if (!src_argb || !dst_ar64 || width <= 0 || height == 0) {
+ void (*ARGBToYJRow)(const uint8_t* src_argb, uint8_t* dst_yj, int width) =
+ ARGBToYJRow_C;
+ if (!src_argb || !dst_yj || width <= 0 || height == 0) {
return -1;
}
- // Negative height means invert the image.
if (height < 0) {
height = -height;
src_argb = src_argb + (height - 1) * src_stride_argb;
src_stride_argb = -src_stride_argb;
}
// Coalesce rows.
- if (src_stride_argb == width * 4 && dst_stride_ar64 == width * 4) {
+ if (src_stride_argb == width * 4 && dst_stride_yj == width) {
width *= height;
height = 1;
- src_stride_argb = dst_stride_ar64 = 0;
+ src_stride_argb = dst_stride_yj = 0;
}
-#if defined(HAS_ARGBTOAR64ROW_SSSE3)
+#if defined(HAS_ARGBTOYJROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
- ARGBToAR64Row = ARGBToAR64Row_Any_SSSE3;
- if (IS_ALIGNED(width, 4)) {
- ARGBToAR64Row = ARGBToAR64Row_SSSE3;
+ ARGBToYJRow = ARGBToYJRow_Any_SSSE3;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBToYJRow = ARGBToYJRow_SSSE3;
}
}
#endif
-#if defined(HAS_ARGBTOAR64ROW_AVX2)
+#if defined(HAS_ARGBTOYJROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
- ARGBToAR64Row = ARGBToAR64Row_Any_AVX2;
- if (IS_ALIGNED(width, 8)) {
- ARGBToAR64Row = ARGBToAR64Row_AVX2;
+ ARGBToYJRow = ARGBToYJRow_Any_AVX2;
+ if (IS_ALIGNED(width, 32)) {
+ ARGBToYJRow = ARGBToYJRow_AVX2;
}
}
#endif
-#if defined(HAS_ARGBTOAR64ROW_NEON)
+#if defined(HAS_ARGBTOYJROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
- ARGBToAR64Row = ARGBToAR64Row_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- ARGBToAR64Row = ARGBToAR64Row_NEON;
+ ARGBToYJRow = ARGBToYJRow_Any_NEON;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBToYJRow = ARGBToYJRow_NEON;
}
}
#endif
+#if defined(HAS_ARGBTOYJROW_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ ARGBToYJRow = ARGBToYJRow_Any_MSA;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBToYJRow = ARGBToYJRow_MSA;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTOYJROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ ARGBToYJRow = ARGBToYJRow_RVV;
+ }
+#endif
for (y = 0; y < height; ++y) {
- ARGBToAR64Row(src_argb, dst_ar64, width);
+ ARGBToYJRow(src_argb, dst_yj, width);
src_argb += src_stride_argb;
- dst_ar64 += dst_stride_ar64;
+ dst_yj += dst_stride_yj;
}
return 0;
}
-// Convert ARGB to AB64.
+// Convert RGBA to J400.
LIBYUV_API
-int ARGBToAB64(const uint8_t* src_argb,
- int src_stride_argb,
- uint16_t* dst_ab64,
- int dst_stride_ab64,
+int RGBAToJ400(const uint8_t* src_rgba,
+ int src_stride_rgba,
+ uint8_t* dst_yj,
+ int dst_stride_yj,
int width,
int height) {
int y;
- void (*ARGBToAB64Row)(const uint8_t* src_argb, uint16_t* dst_ar64,
- int width) = ARGBToAB64Row_C;
- if (!src_argb || !dst_ab64 || width <= 0 || height == 0) {
+ void (*RGBAToYJRow)(const uint8_t* src_rgba, uint8_t* dst_yj, int width) =
+ RGBAToYJRow_C;
+ if (!src_rgba || !dst_yj || width <= 0 || height == 0) {
return -1;
}
- // Negative height means invert the image.
if (height < 0) {
height = -height;
- src_argb = src_argb + (height - 1) * src_stride_argb;
- src_stride_argb = -src_stride_argb;
+ src_rgba = src_rgba + (height - 1) * src_stride_rgba;
+ src_stride_rgba = -src_stride_rgba;
}
// Coalesce rows.
- if (src_stride_argb == width * 4 && dst_stride_ab64 == width * 4) {
+ if (src_stride_rgba == width * 4 && dst_stride_yj == width) {
width *= height;
height = 1;
- src_stride_argb = dst_stride_ab64 = 0;
+ src_stride_rgba = dst_stride_yj = 0;
}
-#if defined(HAS_ARGBTOAB64ROW_SSSE3)
+#if defined(HAS_RGBATOYJROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
- ARGBToAB64Row = ARGBToAB64Row_Any_SSSE3;
- if (IS_ALIGNED(width, 4)) {
- ARGBToAB64Row = ARGBToAB64Row_SSSE3;
+ RGBAToYJRow = RGBAToYJRow_Any_SSSE3;
+ if (IS_ALIGNED(width, 16)) {
+ RGBAToYJRow = RGBAToYJRow_SSSE3;
}
}
#endif
-#if defined(HAS_ARGBTOAB64ROW_AVX2)
+#if defined(HAS_RGBATOYJROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
- ARGBToAB64Row = ARGBToAB64Row_Any_AVX2;
- if (IS_ALIGNED(width, 8)) {
- ARGBToAB64Row = ARGBToAB64Row_AVX2;
+ RGBAToYJRow = RGBAToYJRow_Any_AVX2;
+ if (IS_ALIGNED(width, 32)) {
+ RGBAToYJRow = RGBAToYJRow_AVX2;
}
}
#endif
-#if defined(HAS_ARGBTOAB64ROW_NEON)
+#if defined(HAS_RGBATOYJROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
- ARGBToAB64Row = ARGBToAB64Row_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- ARGBToAB64Row = ARGBToAB64Row_NEON;
+ RGBAToYJRow = RGBAToYJRow_Any_NEON;
+ if (IS_ALIGNED(width, 16)) {
+ RGBAToYJRow = RGBAToYJRow_NEON;
}
}
#endif
+#if defined(HAS_RGBATOYJROW_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ RGBAToYJRow = RGBAToYJRow_Any_MSA;
+ if (IS_ALIGNED(width, 16)) {
+ RGBAToYJRow = RGBAToYJRow_MSA;
+ }
+ }
+#endif
+#if defined(HAS_RGBATOYJROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ RGBAToYJRow = RGBAToYJRow_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ RGBAToYJRow = RGBAToYJRow_LSX;
+ }
+ }
+#endif
+#if defined(HAS_RGBATOYJROW_LASX)
+ if (TestCpuFlag(kCpuHasLASX)) {
+ RGBAToYJRow = RGBAToYJRow_Any_LASX;
+ if (IS_ALIGNED(width, 32)) {
+ RGBAToYJRow = RGBAToYJRow_LASX;
+ }
+ }
+#endif
+#if defined(HAS_RGBATOYJROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ RGBAToYJRow = RGBAToYJRow_RVV;
+ }
+#endif
for (y = 0; y < height; ++y) {
- ARGBToAB64Row(src_argb, dst_ab64, width);
- src_argb += src_stride_argb;
- dst_ab64 += dst_stride_ab64;
+ RGBAToYJRow(src_rgba, dst_yj, width);
+ src_rgba += src_stride_rgba;
+ dst_yj += dst_stride_yj;
}
return 0;
}
-// Convert ARGB to J400.
+// Convert ABGR to J420. (JPeg full range I420).
LIBYUV_API
-int ARGBToJ400(const uint8_t* src_argb,
- int src_stride_argb,
+int ABGRToJ420(const uint8_t* src_abgr,
+ int src_stride_abgr,
uint8_t* dst_yj,
int dst_stride_yj,
+ uint8_t* dst_uj,
+ int dst_stride_uj,
+ uint8_t* dst_vj,
+ int dst_stride_vj,
int width,
int height) {
int y;
- void (*ARGBToYJRow)(const uint8_t* src_argb, uint8_t* dst_yj, int width) =
- ARGBToYJRow_C;
- if (!src_argb || !dst_yj || width <= 0 || height == 0) {
+ void (*ABGRToUVJRow)(const uint8_t* src_abgr0, int src_stride_abgr,
+ uint8_t* dst_uj, uint8_t* dst_vj, int width) =
+ ABGRToUVJRow_C;
+ void (*ABGRToYJRow)(const uint8_t* src_abgr, uint8_t* dst_yj, int width) =
+ ABGRToYJRow_C;
+ if (!src_abgr || !dst_yj || !dst_uj || !dst_vj || width <= 0 || height == 0) {
return -1;
}
+ // Negative height means invert the image.
if (height < 0) {
height = -height;
- src_argb = src_argb + (height - 1) * src_stride_argb;
- src_stride_argb = -src_stride_argb;
+ src_abgr = src_abgr + (height - 1) * src_stride_abgr;
+ src_stride_abgr = -src_stride_abgr;
+ }
+#if defined(HAS_ABGRTOYJROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ ABGRToYJRow = ABGRToYJRow_Any_SSSE3;
+ if (IS_ALIGNED(width, 16)) {
+ ABGRToYJRow = ABGRToYJRow_SSSE3;
+ }
+ }
+#endif
+#if defined(HAS_ABGRTOUVJROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ ABGRToUVJRow = ABGRToUVJRow_Any_SSSE3;
+ if (IS_ALIGNED(width, 16)) {
+ ABGRToUVJRow = ABGRToUVJRow_SSSE3;
+ }
+ }
+#endif
+#if defined(HAS_ABGRTOYJROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ ABGRToYJRow = ABGRToYJRow_Any_AVX2;
+ if (IS_ALIGNED(width, 32)) {
+ ABGRToYJRow = ABGRToYJRow_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_ABGRTOUVJROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ ABGRToUVJRow = ABGRToUVJRow_Any_AVX2;
+ if (IS_ALIGNED(width, 32)) {
+ ABGRToUVJRow = ABGRToUVJRow_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_ABGRTOYJROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ ABGRToYJRow = ABGRToYJRow_Any_NEON;
+ if (IS_ALIGNED(width, 16)) {
+ ABGRToYJRow = ABGRToYJRow_NEON;
+ }
+ }
+#endif
+#if defined(HAS_ABGRTOUVJROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ ABGRToUVJRow = ABGRToUVJRow_Any_NEON;
+ if (IS_ALIGNED(width, 16)) {
+ ABGRToUVJRow = ABGRToUVJRow_NEON;
+ }
+ }
+#endif
+#if defined(HAS_ABGRTOYJROW_MSA) && defined(HAS_ABGRTOUVJROW_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ ABGRToYJRow = ABGRToYJRow_Any_MSA;
+ ABGRToUVJRow = ABGRToUVJRow_Any_MSA;
+ if (IS_ALIGNED(width, 16)) {
+ ABGRToYJRow = ABGRToYJRow_MSA;
+ ABGRToUVJRow = ABGRToUVJRow_MSA;
+ }
+ }
+#endif
+#if defined(HAS_ABGRTOYJROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ ABGRToYJRow = ABGRToYJRow_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ ABGRToYJRow = ABGRToYJRow_LSX;
+ }
+ }
+#endif
+#if defined(HAS_ABGRTOYJROW_LASX)
+ if (TestCpuFlag(kCpuHasLASX)) {
+ ABGRToYJRow = ABGRToYJRow_Any_LASX;
+ if (IS_ALIGNED(width, 32)) {
+ ABGRToYJRow = ABGRToYJRow_LASX;
+ }
+ }
+#endif
+#if defined(HAS_ABGRTOYJROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ ABGRToYJRow = ABGRToYJRow_RVV;
+ }
+#endif
+
+ for (y = 0; y < height - 1; y += 2) {
+ ABGRToUVJRow(src_abgr, src_stride_abgr, dst_uj, dst_vj, width);
+ ABGRToYJRow(src_abgr, dst_yj, width);
+ ABGRToYJRow(src_abgr + src_stride_abgr, dst_yj + dst_stride_yj, width);
+ src_abgr += src_stride_abgr * 2;
+ dst_yj += dst_stride_yj * 2;
+ dst_uj += dst_stride_uj;
+ dst_vj += dst_stride_vj;
+ }
+ if (height & 1) {
+ ABGRToUVJRow(src_abgr, 0, dst_uj, dst_vj, width);
+ ABGRToYJRow(src_abgr, dst_yj, width);
+ }
+ return 0;
+}
+
+// Convert ABGR to J422. (JPeg full range I422).
+LIBYUV_API
+int ABGRToJ422(const uint8_t* src_abgr,
+ int src_stride_abgr,
+ uint8_t* dst_yj,
+ int dst_stride_yj,
+ uint8_t* dst_uj,
+ int dst_stride_uj,
+ uint8_t* dst_vj,
+ int dst_stride_vj,
+ int width,
+ int height) {
+ int y;
+ void (*ABGRToUVJRow)(const uint8_t* src_abgr0, int src_stride_abgr,
+ uint8_t* dst_uj, uint8_t* dst_vj, int width) =
+ ABGRToUVJRow_C;
+ void (*ABGRToYJRow)(const uint8_t* src_abgr, uint8_t* dst_yj, int width) =
+ ABGRToYJRow_C;
+ if (!src_abgr || !dst_yj || !dst_uj || !dst_vj || width <= 0 || height == 0) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ src_abgr = src_abgr + (height - 1) * src_stride_abgr;
+ src_stride_abgr = -src_stride_abgr;
}
// Coalesce rows.
- if (src_stride_argb == width * 4 && dst_stride_yj == width) {
+ if (src_stride_abgr == width * 4 && dst_stride_yj == width &&
+ dst_stride_uj * 2 == width && dst_stride_vj * 2 == width) {
width *= height;
height = 1;
- src_stride_argb = dst_stride_yj = 0;
+ src_stride_abgr = dst_stride_yj = dst_stride_uj = dst_stride_vj = 0;
}
-#if defined(HAS_ARGBTOYJROW_SSSE3)
+#if defined(HAS_ABGRTOYJROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
- ARGBToYJRow = ARGBToYJRow_Any_SSSE3;
+ ABGRToYJRow = ABGRToYJRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
- ARGBToYJRow = ARGBToYJRow_SSSE3;
+ ABGRToYJRow = ABGRToYJRow_SSSE3;
}
}
#endif
-#if defined(HAS_ARGBTOYJROW_AVX2)
+#if defined(HAS_ABGRTOUVJROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ ABGRToUVJRow = ABGRToUVJRow_Any_SSSE3;
+ if (IS_ALIGNED(width, 16)) {
+ ABGRToUVJRow = ABGRToUVJRow_SSSE3;
+ }
+ }
+#endif
+#if defined(HAS_ABGRTOYJROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
- ARGBToYJRow = ARGBToYJRow_Any_AVX2;
+ ABGRToYJRow = ABGRToYJRow_Any_AVX2;
if (IS_ALIGNED(width, 32)) {
- ARGBToYJRow = ARGBToYJRow_AVX2;
+ ABGRToYJRow = ABGRToYJRow_AVX2;
}
}
#endif
-#if defined(HAS_ARGBTOYJROW_NEON)
+#if defined(HAS_ABGRTOUVJROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ ABGRToUVJRow = ABGRToUVJRow_Any_AVX2;
+ if (IS_ALIGNED(width, 32)) {
+ ABGRToUVJRow = ABGRToUVJRow_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_ABGRTOYJROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
- ARGBToYJRow = ARGBToYJRow_Any_NEON;
+ ABGRToYJRow = ABGRToYJRow_Any_NEON;
if (IS_ALIGNED(width, 16)) {
- ARGBToYJRow = ARGBToYJRow_NEON;
+ ABGRToYJRow = ABGRToYJRow_NEON;
}
}
#endif
-#if defined(HAS_ARGBTOYJROW_MSA)
+#if defined(HAS_ABGRTOUVJROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ ABGRToUVJRow = ABGRToUVJRow_Any_NEON;
+ if (IS_ALIGNED(width, 16)) {
+ ABGRToUVJRow = ABGRToUVJRow_NEON;
+ }
+ }
+#endif
+#if defined(HAS_ABGRTOYJROW_MSA) && defined(HAS_ABGRTOUVJROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
- ARGBToYJRow = ARGBToYJRow_Any_MSA;
+ ABGRToYJRow = ABGRToYJRow_Any_MSA;
+ ABGRToUVJRow = ABGRToUVJRow_Any_MSA;
if (IS_ALIGNED(width, 16)) {
- ARGBToYJRow = ARGBToYJRow_MSA;
+ ABGRToYJRow = ABGRToYJRow_MSA;
+ }
+ if (IS_ALIGNED(width, 32)) {
+ ABGRToUVJRow = ABGRToUVJRow_MSA;
}
}
#endif
+#if defined(HAS_ABGRTOYJROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ ABGRToYJRow = ABGRToYJRow_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ ABGRToYJRow = ABGRToYJRow_LSX;
+ }
+ }
+#endif
+#if defined(HAS_ABGRTOYJROW_LASX)
+ if (TestCpuFlag(kCpuHasLASX)) {
+ ABGRToYJRow = ABGRToYJRow_Any_LASX;
+ if (IS_ALIGNED(width, 32)) {
+ ABGRToYJRow = ABGRToYJRow_LASX;
+ }
+ }
+#endif
+#if defined(HAS_ABGRTOYJROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ ABGRToYJRow = ABGRToYJRow_RVV;
+ }
+#endif
for (y = 0; y < height; ++y) {
- ARGBToYJRow(src_argb, dst_yj, width);
- src_argb += src_stride_argb;
+ ABGRToUVJRow(src_abgr, 0, dst_uj, dst_vj, width);
+ ABGRToYJRow(src_abgr, dst_yj, width);
+ src_abgr += src_stride_abgr;
dst_yj += dst_stride_yj;
+ dst_uj += dst_stride_uj;
+ dst_vj += dst_stride_vj;
}
return 0;
}
-// Convert RGBA to J400.
+// Convert ABGR to J400.
LIBYUV_API
-int RGBAToJ400(const uint8_t* src_rgba,
- int src_stride_rgba,
+int ABGRToJ400(const uint8_t* src_abgr,
+ int src_stride_abgr,
uint8_t* dst_yj,
int dst_stride_yj,
int width,
int height) {
int y;
- void (*RGBAToYJRow)(const uint8_t* src_rgba, uint8_t* dst_yj, int width) =
- RGBAToYJRow_C;
- if (!src_rgba || !dst_yj || width <= 0 || height == 0) {
+ void (*ABGRToYJRow)(const uint8_t* src_abgr, uint8_t* dst_yj, int width) =
+ ABGRToYJRow_C;
+ if (!src_abgr || !dst_yj || width <= 0 || height == 0) {
return -1;
}
if (height < 0) {
height = -height;
- src_rgba = src_rgba + (height - 1) * src_stride_rgba;
- src_stride_rgba = -src_stride_rgba;
+ src_abgr = src_abgr + (height - 1) * src_stride_abgr;
+ src_stride_abgr = -src_stride_abgr;
}
// Coalesce rows.
- if (src_stride_rgba == width * 4 && dst_stride_yj == width) {
+ if (src_stride_abgr == width * 4 && dst_stride_yj == width) {
width *= height;
height = 1;
- src_stride_rgba = dst_stride_yj = 0;
+ src_stride_abgr = dst_stride_yj = 0;
}
-#if defined(HAS_RGBATOYJROW_SSSE3)
+#if defined(HAS_ABGRTOYJROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
- RGBAToYJRow = RGBAToYJRow_Any_SSSE3;
+ ABGRToYJRow = ABGRToYJRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
- RGBAToYJRow = RGBAToYJRow_SSSE3;
+ ABGRToYJRow = ABGRToYJRow_SSSE3;
}
}
#endif
-#if defined(HAS_RGBATOYJROW_AVX2)
+#if defined(HAS_ABGRTOYJROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
- RGBAToYJRow = RGBAToYJRow_Any_AVX2;
+ ABGRToYJRow = ABGRToYJRow_Any_AVX2;
if (IS_ALIGNED(width, 32)) {
- RGBAToYJRow = RGBAToYJRow_AVX2;
+ ABGRToYJRow = ABGRToYJRow_AVX2;
}
}
#endif
-#if defined(HAS_RGBATOYJROW_NEON)
+#if defined(HAS_ABGRTOYJROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
- RGBAToYJRow = RGBAToYJRow_Any_NEON;
+ ABGRToYJRow = ABGRToYJRow_Any_NEON;
if (IS_ALIGNED(width, 16)) {
- RGBAToYJRow = RGBAToYJRow_NEON;
+ ABGRToYJRow = ABGRToYJRow_NEON;
}
}
#endif
-#if defined(HAS_RGBATOYJROW_MSA)
+#if defined(HAS_ABGRTOYJROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
- RGBAToYJRow = RGBAToYJRow_Any_MSA;
+ ABGRToYJRow = ABGRToYJRow_Any_MSA;
if (IS_ALIGNED(width, 16)) {
- RGBAToYJRow = RGBAToYJRow_MSA;
+ ABGRToYJRow = ABGRToYJRow_MSA;
+ }
+ }
+#endif
+#if defined(HAS_ABGRTOYJROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ ABGRToYJRow = ABGRToYJRow_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ ABGRToYJRow = ABGRToYJRow_LSX;
+ }
+ }
+#endif
+#if defined(HAS_ABGRTOYJROW_LASX)
+ if (TestCpuFlag(kCpuHasLASX)) {
+ ABGRToYJRow = ABGRToYJRow_Any_LASX;
+ if (IS_ALIGNED(width, 32)) {
+ ABGRToYJRow = ABGRToYJRow_LASX;
}
}
#endif
+#if defined(HAS_ABGRTOYJROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ ABGRToYJRow = ABGRToYJRow_RVV;
+ }
+#endif
for (y = 0; y < height; ++y) {
- RGBAToYJRow(src_rgba, dst_yj, width);
- src_rgba += src_stride_rgba;
+ ABGRToYJRow(src_abgr, dst_yj, width);
+ src_abgr += src_stride_abgr;
dst_yj += dst_stride_yj;
}
return 0;
}
+// Convert ARGB to AR64.
+LIBYUV_API
+int ARGBToAR64(const uint8_t* src_argb,
+ int src_stride_argb,
+ uint16_t* dst_ar64,
+ int dst_stride_ar64,
+ int width,
+ int height) {
+ int y;
+ void (*ARGBToAR64Row)(const uint8_t* src_argb, uint16_t* dst_ar64,
+ int width) = ARGBToAR64Row_C;
+ if (!src_argb || !dst_ar64 || width <= 0 || height == 0) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ src_argb = src_argb + (height - 1) * src_stride_argb;
+ src_stride_argb = -src_stride_argb;
+ }
+ // Coalesce rows.
+ if (src_stride_argb == width * 4 && dst_stride_ar64 == width * 4) {
+ width *= height;
+ height = 1;
+ src_stride_argb = dst_stride_ar64 = 0;
+ }
+#if defined(HAS_ARGBTOAR64ROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ ARGBToAR64Row = ARGBToAR64Row_Any_SSSE3;
+ if (IS_ALIGNED(width, 4)) {
+ ARGBToAR64Row = ARGBToAR64Row_SSSE3;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTOAR64ROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ ARGBToAR64Row = ARGBToAR64Row_Any_AVX2;
+ if (IS_ALIGNED(width, 8)) {
+ ARGBToAR64Row = ARGBToAR64Row_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTOAR64ROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ ARGBToAR64Row = ARGBToAR64Row_Any_NEON;
+ if (IS_ALIGNED(width, 8)) {
+ ARGBToAR64Row = ARGBToAR64Row_NEON;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTOAR64ROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ ARGBToAR64Row = ARGBToAR64Row_RVV;
+ }
+#endif
+
+ for (y = 0; y < height; ++y) {
+ ARGBToAR64Row(src_argb, dst_ar64, width);
+ src_argb += src_stride_argb;
+ dst_ar64 += dst_stride_ar64;
+ }
+ return 0;
+}
+
+// Convert ARGB to AB64.
+LIBYUV_API
+int ARGBToAB64(const uint8_t* src_argb,
+ int src_stride_argb,
+ uint16_t* dst_ab64,
+ int dst_stride_ab64,
+ int width,
+ int height) {
+ int y;
+ void (*ARGBToAB64Row)(const uint8_t* src_argb, uint16_t* dst_ar64,
+ int width) = ARGBToAB64Row_C;
+ if (!src_argb || !dst_ab64 || width <= 0 || height == 0) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ src_argb = src_argb + (height - 1) * src_stride_argb;
+ src_stride_argb = -src_stride_argb;
+ }
+ // Coalesce rows.
+ if (src_stride_argb == width * 4 && dst_stride_ab64 == width * 4) {
+ width *= height;
+ height = 1;
+ src_stride_argb = dst_stride_ab64 = 0;
+ }
+#if defined(HAS_ARGBTOAB64ROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ ARGBToAB64Row = ARGBToAB64Row_Any_SSSE3;
+ if (IS_ALIGNED(width, 4)) {
+ ARGBToAB64Row = ARGBToAB64Row_SSSE3;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTOAB64ROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ ARGBToAB64Row = ARGBToAB64Row_Any_AVX2;
+ if (IS_ALIGNED(width, 8)) {
+ ARGBToAB64Row = ARGBToAB64Row_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTOAB64ROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ ARGBToAB64Row = ARGBToAB64Row_Any_NEON;
+ if (IS_ALIGNED(width, 8)) {
+ ARGBToAB64Row = ARGBToAB64Row_NEON;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTOAB64ROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ ARGBToAB64Row = ARGBToAB64Row_RVV;
+ }
+#endif
+
+ for (y = 0; y < height; ++y) {
+ ARGBToAB64Row(src_argb, dst_ab64, width);
+ src_argb += src_stride_argb;
+ dst_ab64 += dst_stride_ab64;
+ }
+ return 0;
+}
+
// Enabled if 1 pass is available
-#if defined(HAS_RAWTOYJROW_NEON) || defined(HAS_RAWTOYJROW_MSA)
+#if defined(HAS_RAWTOYJROW_NEON) || defined(HAS_RAWTOYJROW_MSA) || \
+ defined(HAS_RAWTOYJROW_RVV)
#define HAS_RAWTOYJROW
#endif
@@ -2355,7 +3062,7 @@ int RAWToJNV21(const uint8_t* src_raw,
int halfwidth = (width + 1) >> 1;
#if defined(HAS_RAWTOYJROW)
void (*RAWToUVJRow)(const uint8_t* src_raw, int src_stride_raw,
- uint8_t* dst_u, uint8_t* dst_v, int width) =
+ uint8_t* dst_uj, uint8_t* dst_vj, int width) =
RAWToUVJRow_C;
void (*RAWToYJRow)(const uint8_t* src_raw, uint8_t* dst_y, int width) =
RAWToYJRow_C;
@@ -2363,12 +3070,12 @@ int RAWToJNV21(const uint8_t* src_raw,
void (*RAWToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb, int width) =
RAWToARGBRow_C;
void (*ARGBToUVJRow)(const uint8_t* src_argb0, int src_stride_argb,
- uint8_t* dst_u, uint8_t* dst_v, int width) =
+ uint8_t* dst_uj, uint8_t* dst_vj, int width) =
ARGBToUVJRow_C;
void (*ARGBToYJRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) =
ARGBToYJRow_C;
#endif
- void (*MergeUVRow_)(const uint8_t* src_u, const uint8_t* src_v,
+ void (*MergeUVRow_)(const uint8_t* src_uj, const uint8_t* src_vj,
uint8_t* dst_vu, int width) = MergeUVRow_C;
if (!src_raw || !dst_y || !dst_vu || width <= 0 || height == 0) {
return -1;
@@ -2403,6 +3110,27 @@ int RAWToJNV21(const uint8_t* src_raw,
}
}
#endif
+#if defined(HAS_RAWTOYJROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ RAWToYJRow = RAWToYJRow_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ RAWToYJRow = RAWToYJRow_LSX;
+ }
+ }
+#endif
+#if defined(HAS_RAWTOYJROW_LASX)
+ if (TestCpuFlag(kCpuHasLASX)) {
+ RAWToYJRow = RAWToYJRow_Any_LASX;
+ if (IS_ALIGNED(width, 32)) {
+ RAWToYJRow = RAWToYJRow_LASX;
+ }
+ }
+#endif
+#if defined(HAS_RAWTOYJROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ RAWToYJRow = RAWToYJRow_RVV;
+ }
+#endif
// Other platforms do intermediate conversion from RAW to ARGB.
#else // HAS_RAWTOYJROW
@@ -2459,11 +3187,19 @@ int RAWToJNV21(const uint8_t* src_raw,
#if defined(HAS_MERGEUVROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
MergeUVRow_ = MergeUVRow_Any_AVX2;
- if (IS_ALIGNED(halfwidth, 32)) {
+ if (IS_ALIGNED(halfwidth, 16)) {
MergeUVRow_ = MergeUVRow_AVX2;
}
}
#endif
+#if defined(HAS_MERGEUVROW_AVX512BW)
+ if (TestCpuFlag(kCpuHasAVX512BW)) {
+ MergeUVRow_ = MergeUVRow_Any_AVX512BW;
+ if (IS_ALIGNED(halfwidth, 64)) {
+ MergeUVRow_ = MergeUVRow_AVX512BW;
+ }
+ }
+#endif
#if defined(HAS_MERGEUVROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
MergeUVRow_ = MergeUVRow_Any_NEON;
@@ -2488,29 +3224,34 @@ int RAWToJNV21(const uint8_t* src_raw,
}
}
#endif
+#if defined(HAS_MERGEUVROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ MergeUVRow_ = MergeUVRow_RVV;
+ }
+#endif
{
// Allocate a row of uv.
- align_buffer_64(row_u, ((halfwidth + 31) & ~31) * 2);
- uint8_t* row_v = row_u + ((halfwidth + 31) & ~31);
+ align_buffer_64(row_uj, ((halfwidth + 31) & ~31) * 2);
+ uint8_t* row_vj = row_uj + ((halfwidth + 31) & ~31);
#if !defined(HAS_RAWTOYJROW)
// Allocate 2 rows of ARGB.
- const int kRowSize = (width * 4 + 31) & ~31;
- align_buffer_64(row, kRowSize * 2);
+ const int row_size = (width * 4 + 31) & ~31;
+ align_buffer_64(row, row_size * 2);
#endif
for (y = 0; y < height - 1; y += 2) {
#if defined(HAS_RAWTOYJROW)
- RAWToUVJRow(src_raw, src_stride_raw, row_u, row_v, width);
- MergeUVRow_(row_v, row_u, dst_vu, halfwidth);
+ RAWToUVJRow(src_raw, src_stride_raw, row_uj, row_vj, width);
+ MergeUVRow_(row_vj, row_uj, dst_vu, halfwidth);
RAWToYJRow(src_raw, dst_y, width);
RAWToYJRow(src_raw + src_stride_raw, dst_y + dst_stride_y, width);
#else
RAWToARGBRow(src_raw, row, width);
- RAWToARGBRow(src_raw + src_stride_raw, row + kRowSize, width);
- ARGBToUVJRow(row, kRowSize, row_u, row_v, width);
- MergeUVRow_(row_v, row_u, dst_vu, halfwidth);
+ RAWToARGBRow(src_raw + src_stride_raw, row + row_size, width);
+ ARGBToUVJRow(row, row_size, row_uj, row_vj, width);
+ MergeUVRow_(row_vj, row_uj, dst_vu, halfwidth);
ARGBToYJRow(row, dst_y, width);
- ARGBToYJRow(row + kRowSize, dst_y + dst_stride_y, width);
+ ARGBToYJRow(row + row_size, dst_y + dst_stride_y, width);
#endif
src_raw += src_stride_raw * 2;
dst_y += dst_stride_y * 2;
@@ -2518,20 +3259,20 @@ int RAWToJNV21(const uint8_t* src_raw,
}
if (height & 1) {
#if defined(HAS_RAWTOYJROW)
- RAWToUVJRow(src_raw, 0, row_u, row_v, width);
- MergeUVRow_(row_v, row_u, dst_vu, halfwidth);
+ RAWToUVJRow(src_raw, 0, row_uj, row_vj, width);
+ MergeUVRow_(row_vj, row_uj, dst_vu, halfwidth);
RAWToYJRow(src_raw, dst_y, width);
#else
RAWToARGBRow(src_raw, row, width);
- ARGBToUVJRow(row, 0, row_u, row_v, width);
- MergeUVRow_(row_v, row_u, dst_vu, halfwidth);
+ ARGBToUVJRow(row, 0, row_uj, row_vj, width);
+ MergeUVRow_(row_vj, row_uj, dst_vu, halfwidth);
ARGBToYJRow(row, dst_y, width);
#endif
}
#if !defined(HAS_RAWTOYJROW)
free_aligned_buffer_64(row);
#endif
- free_aligned_buffer_64(row_u);
+ free_aligned_buffer_64(row_uj);
}
return 0;
}
diff --git a/files/source/cpu_id.cc b/files/source/cpu_id.cc
index 56fe60e4..0c4a1581 100644
--- a/files/source/cpu_id.cc
+++ b/files/source/cpu_id.cc
@@ -40,7 +40,6 @@ extern "C" {
// cpu_info_ variable for SIMD instruction sets detected.
LIBYUV_API int cpu_info_ = 0;
-// TODO(fbarchard): Consider using int for cpuid so casting is not needed.
// Low level cpuid for X86.
#if (defined(_M_IX86) || defined(_M_X64) || defined(__i386__) || \
defined(__x86_64__)) && \
@@ -108,14 +107,14 @@ void CpuId(int eax, int ecx, int* cpu_info) {
// }
// For VS2013 and earlier 32 bit, the _xgetbv(0) optimizer produces bad code.
// https://code.google.com/p/libyuv/issues/detail?id=529
-#if defined(_M_IX86) && (_MSC_VER < 1900)
+#if defined(_M_IX86) && defined(_MSC_VER) && (_MSC_VER < 1900)
#pragma optimize("g", off)
#endif
#if (defined(_M_IX86) || defined(_M_X64) || defined(__i386__) || \
defined(__x86_64__)) && \
!defined(__pnacl__) && !defined(__CLR_VER) && !defined(__native_client__)
// X86 CPUs have xgetbv to detect OS saves high parts of ymm registers.
-int GetXCR0() {
+static int GetXCR0() {
int xcr0 = 0;
#if defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 160040219)
xcr0 = (int)_xgetbv(0); // VS2010 SP1 required. NOLINT
@@ -129,7 +128,7 @@ int GetXCR0() {
#define GetXCR0() 0
#endif // defined(_M_IX86) || defined(_M_X64) ..
// Return optimization to previous setting.
-#if defined(_M_IX86) && (_MSC_VER < 1900)
+#if defined(_M_IX86) && defined(_MSC_VER) && (_MSC_VER < 1900)
#pragma optimize("g", on)
#endif
@@ -137,13 +136,14 @@ int GetXCR0() {
// For Arm, but public to allow testing on any CPU
LIBYUV_API SAFEBUFFERS int ArmCpuCaps(const char* cpuinfo_name) {
char cpuinfo_line[512];
- FILE* f = fopen(cpuinfo_name, "r");
+ FILE* f = fopen(cpuinfo_name, "re");
if (!f) {
// Assume Neon if /proc/cpuinfo is unavailable.
// This will occur for Chrome sandbox for Pepper or Render process.
return kCpuHasNEON;
}
- while (fgets(cpuinfo_line, sizeof(cpuinfo_line) - 1, f)) {
+ memset(cpuinfo_line, 0, sizeof(cpuinfo_line));
+ while (fgets(cpuinfo_line, sizeof(cpuinfo_line), f)) {
if (memcmp(cpuinfo_line, "Features", 8) == 0) {
char* p = strstr(cpuinfo_line, " neon");
if (p && (p[5] == ' ' || p[5] == '\n')) {
@@ -162,17 +162,90 @@ LIBYUV_API SAFEBUFFERS int ArmCpuCaps(const char* cpuinfo_name) {
return 0;
}
-// TODO(fbarchard): Consider read_msa_ir().
+LIBYUV_API SAFEBUFFERS int RiscvCpuCaps(const char* cpuinfo_name) {
+ char cpuinfo_line[512];
+ int flag = 0;
+ FILE* f = fopen(cpuinfo_name, "re");
+ if (!f) {
+#if defined(__riscv_vector)
+ // Assume RVV if /proc/cpuinfo is unavailable.
+ // This will occur for Chrome sandbox for Pepper or Render process.
+ return kCpuHasRVV;
+#else
+ return 0;
+#endif
+ }
+ memset(cpuinfo_line, 0, sizeof(cpuinfo_line));
+ while (fgets(cpuinfo_line, sizeof(cpuinfo_line), f)) {
+ if (memcmp(cpuinfo_line, "isa", 3) == 0) {
+ // ISA string must begin with rv64{i,e,g} for a 64-bit processor.
+ char* isa = strstr(cpuinfo_line, "rv64");
+ if (isa) {
+ size_t isa_len = strlen(isa);
+ char* extensions;
+ size_t extensions_len = 0;
+ size_t std_isa_len;
+ // Remove the new-line character at the end of string
+ if (isa[isa_len - 1] == '\n') {
+ isa[--isa_len] = '\0';
+ }
+ // 5 ISA characters
+ if (isa_len < 5) {
+ fclose(f);
+ return 0;
+ }
+ // Skip {i,e,g} canonical checking.
+ // Skip rvxxx
+ isa += 5;
+ // Find the very first occurrence of 's', 'x' or 'z'.
+ // To detect multi-letter standard, non-standard, and
+ // supervisor-level extensions.
+ extensions = strpbrk(isa, "zxs");
+ if (extensions) {
+ // Multi-letter extensions are seperated by a single underscore
+ // as described in RISC-V User-Level ISA V2.2.
+ char* ext = strtok(extensions, "_");
+ extensions_len = strlen(extensions);
+ while (ext) {
+ // Search for the ZVFH (Vector FP16) extension.
+ if (!strcmp(ext, "zvfh")) {
+ flag |= kCpuHasRVVZVFH;
+ }
+ ext = strtok(NULL, "_");
+ }
+ }
+ std_isa_len = isa_len - extensions_len - 5;
+ // Detect the v in the standard single-letter extensions.
+ if (memchr(isa, 'v', std_isa_len)) {
+ // The RVV implied the F extension.
+ flag |= kCpuHasRVV;
+ }
+ }
+ }
+#if defined(__riscv_vector)
+ // Assume RVV if /proc/cpuinfo is from x86 host running QEMU.
+ else if ((memcmp(cpuinfo_line, "vendor_id\t: GenuineIntel", 24) == 0) ||
+ (memcmp(cpuinfo_line, "vendor_id\t: AuthenticAMD", 24) == 0)) {
+ fclose(f);
+ return kCpuHasRVV;
+ }
+#endif
+ }
+ fclose(f);
+ return flag;
+}
+
LIBYUV_API SAFEBUFFERS int MipsCpuCaps(const char* cpuinfo_name) {
char cpuinfo_line[512];
- int flag = 0x0;
- FILE* f = fopen(cpuinfo_name, "r");
+ int flag = 0;
+ FILE* f = fopen(cpuinfo_name, "re");
if (!f) {
// Assume nothing if /proc/cpuinfo is unavailable.
// This will occur for Chrome sandbox for Pepper or Render process.
return 0;
}
- while (fgets(cpuinfo_line, sizeof(cpuinfo_line) - 1, f)) {
+ memset(cpuinfo_line, 0, sizeof(cpuinfo_line));
+ while (fgets(cpuinfo_line, sizeof(cpuinfo_line), f)) {
if (memcmp(cpuinfo_line, "cpu model", 9) == 0) {
// Workaround early kernel without MSA in ASEs line.
if (strstr(cpuinfo_line, "Loongson-2K")) {
@@ -191,14 +264,13 @@ LIBYUV_API SAFEBUFFERS int MipsCpuCaps(const char* cpuinfo_name) {
return flag;
}
-// TODO(fbarchard): Consider read_loongarch_ir().
#define LOONGARCH_CFG2 0x2
#define LOONGARCH_CFG2_LSX (1 << 6)
#define LOONGARCH_CFG2_LASX (1 << 7)
#if defined(__loongarch__)
LIBYUV_API SAFEBUFFERS int LoongarchCpuCaps(void) {
- int flag = 0x0;
+ int flag = 0;
uint32_t cfg2 = 0;
__asm__ volatile("cpucfg %0, %1 \n\t" : "+&r"(cfg2) : "r"(LOONGARCH_CFG2));
@@ -277,6 +349,10 @@ static SAFEBUFFERS int GetCpuFlags(void) {
#endif
cpu_info |= kCpuHasARM;
#endif // __arm__
+#if defined(__riscv) && defined(__linux__)
+ cpu_info = RiscvCpuCaps("/proc/cpuinfo");
+ cpu_info |= kCpuHasRISCV;
+#endif // __riscv
cpu_info |= kCpuInitialized;
return cpu_info;
}
diff --git a/files/source/mjpeg_decoder.cc b/files/source/mjpeg_decoder.cc
index 4ccf00a3..0141da8a 100644
--- a/files/source/mjpeg_decoder.cc
+++ b/files/source/mjpeg_decoder.cc
@@ -109,7 +109,7 @@ LIBYUV_BOOL MJpegDecoder::LoadFrame(const uint8_t* src, size_t src_len) {
}
buf_.data = src;
- buf_.len = static_cast<int>(src_len);
+ buf_.len = (int)src_len;
buf_vec_.pos = 0;
decompress_struct_->client_data = &buf_vec_;
#ifdef HAVE_SETJMP
@@ -428,7 +428,7 @@ boolean fill_input_buffer(j_decompress_ptr cinfo) {
void skip_input_data(j_decompress_ptr cinfo, long num_bytes) { // NOLINT
jpeg_source_mgr* src = cinfo->src;
- size_t bytes = static_cast<size_t>(num_bytes);
+ size_t bytes = (size_t)num_bytes;
if (bytes > src->bytes_in_buffer) {
src->next_input_byte = nullptr;
src->bytes_in_buffer = 0;
diff --git a/files/source/planar_functions.cc b/files/source/planar_functions.cc
index 169d4a8f..d115a2a1 100644
--- a/files/source/planar_functions.cc
+++ b/files/source/planar_functions.cc
@@ -75,6 +75,11 @@ void CopyPlane(const uint8_t* src_y,
CopyRow = IS_ALIGNED(width, 32) ? CopyRow_NEON : CopyRow_Any_NEON;
}
#endif
+#if defined(HAS_COPYROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ CopyRow = CopyRow_RVV;
+ }
+#endif
// Copy plane
for (y = 0; y < height; ++y) {
@@ -162,7 +167,7 @@ void Convert8To16Plane(const uint8_t* src_y,
int src_stride_y,
uint16_t* dst_y,
int dst_stride_y,
- int scale, // 16384 for 10 bits
+ int scale, // 1024 for 10 bits
int width,
int height) {
int y;
@@ -333,6 +338,45 @@ int I210Copy(const uint16_t* src_y,
return 0;
}
+// Copy I410.
+LIBYUV_API
+int I410Copy(const uint16_t* src_y,
+ int src_stride_y,
+ const uint16_t* src_u,
+ int src_stride_u,
+ const uint16_t* src_v,
+ int src_stride_v,
+ uint16_t* dst_y,
+ int dst_stride_y,
+ uint16_t* dst_u,
+ int dst_stride_u,
+ uint16_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height) {
+ if ((!src_y && dst_y) || !src_u || !src_v || !dst_u || !dst_v || width <= 0 ||
+ height == 0) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ src_y = src_y + (height - 1) * src_stride_y;
+ src_u = src_u + (height - 1) * src_stride_u;
+ src_v = src_v + (height - 1) * src_stride_v;
+ src_stride_y = -src_stride_y;
+ src_stride_u = -src_stride_u;
+ src_stride_v = -src_stride_v;
+ }
+
+ if (dst_y) {
+ CopyPlane_16(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
+ }
+ CopyPlane_16(src_u, src_stride_u, dst_u, dst_stride_u, width, height);
+ CopyPlane_16(src_v, src_stride_v, dst_v, dst_stride_v, width, height);
+ return 0;
+}
+
// Copy I400.
LIBYUV_API
int I400ToI400(const uint8_t* src_y,
@@ -385,6 +429,7 @@ int I420ToI400(const uint8_t* src_y,
}
// Copy NV12. Supports inverting.
+LIBYUV_API
int NV12Copy(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_uv,
@@ -418,6 +463,7 @@ int NV12Copy(const uint8_t* src_y,
}
// Copy NV21. Supports inverting.
+LIBYUV_API
int NV21Copy(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_vu,
@@ -504,6 +550,11 @@ void SplitUVPlane(const uint8_t* src_uv,
}
}
#endif
+#if defined(HAS_SPLITUVROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ SplitUVRow = SplitUVRow_RVV;
+ }
+#endif
for (y = 0; y < height; ++y) {
// Copy a row of UV.
@@ -553,11 +604,19 @@ void MergeUVPlane(const uint8_t* src_u,
#if defined(HAS_MERGEUVROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
MergeUVRow = MergeUVRow_Any_AVX2;
- if (IS_ALIGNED(width, 32)) {
+ if (IS_ALIGNED(width, 16)) {
MergeUVRow = MergeUVRow_AVX2;
}
}
#endif
+#if defined(HAS_MERGEUVROW_AVX512BW)
+ if (TestCpuFlag(kCpuHasAVX512BW)) {
+ MergeUVRow = MergeUVRow_Any_AVX512BW;
+ if (IS_ALIGNED(width, 32)) {
+ MergeUVRow = MergeUVRow_AVX512BW;
+ }
+ }
+#endif
#if defined(HAS_MERGEUVROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
MergeUVRow = MergeUVRow_Any_NEON;
@@ -582,6 +641,11 @@ void MergeUVPlane(const uint8_t* src_u,
}
}
#endif
+#if defined(HAS_MERGEUVROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ MergeUVRow = MergeUVRow_RVV;
+ }
+#endif
for (y = 0; y < height; ++y) {
// Merge a row of U and V into a row of UV.
@@ -687,7 +751,7 @@ void MergeUVPlane_16(const uint16_t* src_u,
#if defined(HAS_MERGEUVROW_16_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
MergeUVRow_16 = MergeUVRow_16_Any_AVX2;
- if (IS_ALIGNED(width, 16)) {
+ if (IS_ALIGNED(width, 8)) {
MergeUVRow_16 = MergeUVRow_16_AVX2;
}
}
@@ -911,31 +975,31 @@ int NV21ToNV12(const uint8_t* src_y,
return 0;
}
+// Test if tile_height is a power of 2 (16 or 32)
+#define IS_POWEROFTWO(x) (!((x) & ((x)-1)))
+
// Detile a plane of data
// tile width is 16 and assumed.
// tile_height is 16 or 32 for MM21.
// src_stride_y is bytes per row of source ignoring tiling. e.g. 640
// TODO: More detile row functions.
-
LIBYUV_API
-void DetilePlane(const uint8_t* src_y,
- int src_stride_y,
- uint8_t* dst_y,
- int dst_stride_y,
- int width,
- int height,
- int tile_height) {
+int DetilePlane(const uint8_t* src_y,
+ int src_stride_y,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ int width,
+ int height,
+ int tile_height) {
const ptrdiff_t src_tile_stride = 16 * tile_height;
int y;
void (*DetileRow)(const uint8_t* src, ptrdiff_t src_tile_stride, uint8_t* dst,
int width) = DetileRow_C;
- assert(src_stride_y >= 0);
- assert(tile_height > 0);
- assert(src_stride_y > 0);
-
- if (width <= 0 || height == 0) {
- return;
+ if (!src_y || !dst_y || width <= 0 || height == 0 ||
+ !IS_POWEROFTWO(tile_height)) {
+ return -1;
}
+
// Negative height means invert the image.
if (height < 0) {
height = -height;
@@ -970,6 +1034,72 @@ void DetilePlane(const uint8_t* src_y,
src_y = src_y - src_tile_stride + src_stride_y * tile_height;
}
}
+ return 0;
+}
+
+// Convert a plane of 16 bit tiles of 16 x H to linear.
+// tile width is 16 and assumed.
+// tile_height is 16 or 32 for MT2T.
+LIBYUV_API
+int DetilePlane_16(const uint16_t* src_y,
+ int src_stride_y,
+ uint16_t* dst_y,
+ int dst_stride_y,
+ int width,
+ int height,
+ int tile_height) {
+ const ptrdiff_t src_tile_stride = 16 * tile_height;
+ int y;
+ void (*DetileRow_16)(const uint16_t* src, ptrdiff_t src_tile_stride,
+ uint16_t* dst, int width) = DetileRow_16_C;
+ if (!src_y || !dst_y || width <= 0 || height == 0 ||
+ !IS_POWEROFTWO(tile_height)) {
+ return -1;
+ }
+
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ dst_y = dst_y + (height - 1) * dst_stride_y;
+ dst_stride_y = -dst_stride_y;
+ }
+
+#if defined(HAS_DETILEROW_16_SSE2)
+ if (TestCpuFlag(kCpuHasSSE2)) {
+ DetileRow_16 = DetileRow_16_Any_SSE2;
+ if (IS_ALIGNED(width, 16)) {
+ DetileRow_16 = DetileRow_16_SSE2;
+ }
+ }
+#endif
+#if defined(HAS_DETILEROW_16_AVX)
+ if (TestCpuFlag(kCpuHasAVX)) {
+ DetileRow_16 = DetileRow_16_Any_AVX;
+ if (IS_ALIGNED(width, 16)) {
+ DetileRow_16 = DetileRow_16_AVX;
+ }
+ }
+#endif
+#if defined(HAS_DETILEROW_16_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ DetileRow_16 = DetileRow_16_Any_NEON;
+ if (IS_ALIGNED(width, 16)) {
+ DetileRow_16 = DetileRow_16_NEON;
+ }
+ }
+#endif
+
+ // Detile plane
+ for (y = 0; y < height; ++y) {
+ DetileRow_16(src_y, src_tile_stride, dst_y, width);
+ dst_y += dst_stride_y;
+ src_y += 16;
+ // Advance to next row of tiles.
+ if ((y & (tile_height - 1)) == (tile_height - 1)) {
+ src_y = src_y - src_tile_stride + src_stride_y * tile_height;
+ }
+ }
+ return 0;
}
LIBYUV_API
@@ -1033,6 +1163,74 @@ void DetileSplitUVPlane(const uint8_t* src_uv,
}
}
+LIBYUV_API
+void DetileToYUY2(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_uv,
+ int src_stride_uv,
+ uint8_t* dst_yuy2,
+ int dst_stride_yuy2,
+ int width,
+ int height,
+ int tile_height) {
+ const ptrdiff_t src_y_tile_stride = 16 * tile_height;
+ const ptrdiff_t src_uv_tile_stride = src_y_tile_stride / 2;
+ int y;
+ void (*DetileToYUY2)(const uint8_t* src_y, ptrdiff_t src_y_tile_stride,
+ const uint8_t* src_uv, ptrdiff_t src_uv_tile_stride,
+ uint8_t* dst_yuy2, int width) = DetileToYUY2_C;
+ assert(src_stride_y >= 0);
+ assert(src_stride_y > 0);
+ assert(src_stride_uv >= 0);
+ assert(src_stride_uv > 0);
+ assert(tile_height > 0);
+
+ if (width <= 0 || height == 0 || tile_height <= 0) {
+ return;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ dst_yuy2 = dst_yuy2 + (height - 1) * dst_stride_yuy2;
+ dst_stride_yuy2 = -dst_stride_yuy2;
+ }
+
+#if defined(HAS_DETILETOYUY2_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ DetileToYUY2 = DetileToYUY2_Any_NEON;
+ if (IS_ALIGNED(width, 16)) {
+ DetileToYUY2 = DetileToYUY2_NEON;
+ }
+ }
+#endif
+
+#if defined(HAS_DETILETOYUY2_SSE2)
+ if (TestCpuFlag(kCpuHasSSE2)) {
+ DetileToYUY2 = DetileToYUY2_Any_SSE2;
+ if (IS_ALIGNED(width, 16)) {
+ DetileToYUY2 = DetileToYUY2_SSE2;
+ }
+ }
+#endif
+
+ // Detile plane
+ for (y = 0; y < height; ++y) {
+ DetileToYUY2(src_y, src_y_tile_stride, src_uv, src_uv_tile_stride, dst_yuy2,
+ width);
+ dst_yuy2 += dst_stride_yuy2;
+ src_y += 16;
+
+ if (y & 0x1)
+ src_uv += 16;
+
+ // Advance to next row of tiles.
+ if ((y & (tile_height - 1)) == (tile_height - 1)) {
+ src_y = src_y - src_y_tile_stride + src_stride_y * tile_height;
+ src_uv = src_uv - src_uv_tile_stride + src_stride_uv * (tile_height / 2);
+ }
+ }
+}
+
// Support function for NV12 etc RGB channels.
// Width and height are plane sizes (typically half pixel width).
LIBYUV_API
@@ -1085,6 +1283,11 @@ void SplitRGBPlane(const uint8_t* src_rgb,
}
}
#endif
+#if defined(HAS_SPLITRGBROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ SplitRGBRow = SplitRGBRow_RVV;
+ }
+#endif
for (y = 0; y < height; ++y) {
// Copy a row of RGB.
@@ -1144,6 +1347,11 @@ void MergeRGBPlane(const uint8_t* src_r,
}
}
#endif
+#if defined(HAS_MERGERGBROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ MergeRGBRow = MergeRGBRow_RVV;
+ }
+#endif
for (y = 0; y < height; ++y) {
// Merge a row of U and V into a row of RGB.
@@ -1156,18 +1364,18 @@ void MergeRGBPlane(const uint8_t* src_r,
}
LIBYUV_NOINLINE
-void SplitARGBPlaneAlpha(const uint8_t* src_argb,
- int src_stride_argb,
- uint8_t* dst_r,
- int dst_stride_r,
- uint8_t* dst_g,
- int dst_stride_g,
- uint8_t* dst_b,
- int dst_stride_b,
- uint8_t* dst_a,
- int dst_stride_a,
- int width,
- int height) {
+static void SplitARGBPlaneAlpha(const uint8_t* src_argb,
+ int src_stride_argb,
+ uint8_t* dst_r,
+ int dst_stride_r,
+ uint8_t* dst_g,
+ int dst_stride_g,
+ uint8_t* dst_b,
+ int dst_stride_b,
+ uint8_t* dst_a,
+ int dst_stride_a,
+ int width,
+ int height) {
int y;
void (*SplitARGBRow)(const uint8_t* src_rgb, uint8_t* dst_r, uint8_t* dst_g,
uint8_t* dst_b, uint8_t* dst_a, int width) =
@@ -1175,6 +1383,9 @@ void SplitARGBPlaneAlpha(const uint8_t* src_argb,
assert(height > 0);
+ if (width <= 0 || height == 0) {
+ return;
+ }
if (src_stride_argb == width * 4 && dst_stride_r == width &&
dst_stride_g == width && dst_stride_b == width && dst_stride_a == width) {
width *= height;
@@ -1215,6 +1426,11 @@ void SplitARGBPlaneAlpha(const uint8_t* src_argb,
}
}
#endif
+#if defined(HAS_SPLITARGBROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ SplitARGBRow = SplitARGBRow_RVV;
+ }
+#endif
for (y = 0; y < height; ++y) {
SplitARGBRow(src_argb, dst_r, dst_g, dst_b, dst_a, width);
@@ -1227,21 +1443,24 @@ void SplitARGBPlaneAlpha(const uint8_t* src_argb,
}
LIBYUV_NOINLINE
-void SplitARGBPlaneOpaque(const uint8_t* src_argb,
- int src_stride_argb,
- uint8_t* dst_r,
- int dst_stride_r,
- uint8_t* dst_g,
- int dst_stride_g,
- uint8_t* dst_b,
- int dst_stride_b,
- int width,
- int height) {
+static void SplitARGBPlaneOpaque(const uint8_t* src_argb,
+ int src_stride_argb,
+ uint8_t* dst_r,
+ int dst_stride_r,
+ uint8_t* dst_g,
+ int dst_stride_g,
+ uint8_t* dst_b,
+ int dst_stride_b,
+ int width,
+ int height) {
int y;
void (*SplitXRGBRow)(const uint8_t* src_rgb, uint8_t* dst_r, uint8_t* dst_g,
uint8_t* dst_b, int width) = SplitXRGBRow_C;
assert(height > 0);
+ if (width <= 0 || height == 0) {
+ return;
+ }
if (src_stride_argb == width * 4 && dst_stride_r == width &&
dst_stride_g == width && dst_stride_b == width) {
width *= height;
@@ -1281,6 +1500,11 @@ void SplitARGBPlaneOpaque(const uint8_t* src_argb,
}
}
#endif
+#if defined(HAS_SPLITXRGBROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ SplitXRGBRow = SplitXRGBRow_RVV;
+ }
+#endif
for (y = 0; y < height; ++y) {
SplitXRGBRow(src_argb, dst_r, dst_g, dst_b, width);
@@ -1328,18 +1552,18 @@ void SplitARGBPlane(const uint8_t* src_argb,
}
LIBYUV_NOINLINE
-void MergeARGBPlaneAlpha(const uint8_t* src_r,
- int src_stride_r,
- const uint8_t* src_g,
- int src_stride_g,
- const uint8_t* src_b,
- int src_stride_b,
- const uint8_t* src_a,
- int src_stride_a,
- uint8_t* dst_argb,
- int dst_stride_argb,
- int width,
- int height) {
+static void MergeARGBPlaneAlpha(const uint8_t* src_r,
+ int src_stride_r,
+ const uint8_t* src_g,
+ int src_stride_g,
+ const uint8_t* src_b,
+ int src_stride_b,
+ const uint8_t* src_a,
+ int src_stride_a,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ int width,
+ int height) {
int y;
void (*MergeARGBRow)(const uint8_t* src_r, const uint8_t* src_g,
const uint8_t* src_b, const uint8_t* src_a,
@@ -1347,6 +1571,9 @@ void MergeARGBPlaneAlpha(const uint8_t* src_r,
assert(height > 0);
+ if (width <= 0 || height == 0) {
+ return;
+ }
if (src_stride_r == width && src_stride_g == width && src_stride_b == width &&
src_stride_a == width && dst_stride_argb == width * 4) {
width *= height;
@@ -1378,6 +1605,11 @@ void MergeARGBPlaneAlpha(const uint8_t* src_r,
}
}
#endif
+#if defined(HAS_MERGEARGBROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ MergeARGBRow = MergeARGBRow_RVV;
+ }
+#endif
for (y = 0; y < height; ++y) {
MergeARGBRow(src_r, src_g, src_b, src_a, dst_argb, width);
@@ -1390,16 +1622,16 @@ void MergeARGBPlaneAlpha(const uint8_t* src_r,
}
LIBYUV_NOINLINE
-void MergeARGBPlaneOpaque(const uint8_t* src_r,
- int src_stride_r,
- const uint8_t* src_g,
- int src_stride_g,
- const uint8_t* src_b,
- int src_stride_b,
- uint8_t* dst_argb,
- int dst_stride_argb,
- int width,
- int height) {
+static void MergeARGBPlaneOpaque(const uint8_t* src_r,
+ int src_stride_r,
+ const uint8_t* src_g,
+ int src_stride_g,
+ const uint8_t* src_b,
+ int src_stride_b,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ int width,
+ int height) {
int y;
void (*MergeXRGBRow)(const uint8_t* src_r, const uint8_t* src_g,
const uint8_t* src_b, uint8_t* dst_argb, int width) =
@@ -1407,6 +1639,9 @@ void MergeARGBPlaneOpaque(const uint8_t* src_r,
assert(height > 0);
+ if (width <= 0 || height == 0) {
+ return;
+ }
if (src_stride_r == width && src_stride_g == width && src_stride_b == width &&
dst_stride_argb == width * 4) {
width *= height;
@@ -1437,6 +1672,11 @@ void MergeARGBPlaneOpaque(const uint8_t* src_r,
}
}
#endif
+#if defined(HAS_MERGEXRGBROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ MergeXRGBRow = MergeXRGBRow_RVV;
+ }
+#endif
for (y = 0; y < height; ++y) {
MergeXRGBRow(src_r, src_g, src_b, dst_argb, width);
@@ -1888,6 +2128,16 @@ int YUY2ToI422(const uint8_t* src_yuy2,
}
}
#endif
+#if defined(HAS_YUY2TOYROW_LSX) && defined(HAS_YUY2TOUV422ROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ YUY2ToYRow = YUY2ToYRow_Any_LSX;
+ YUY2ToUV422Row = YUY2ToUV422Row_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ YUY2ToYRow = YUY2ToYRow_LSX;
+ YUY2ToUV422Row = YUY2ToUV422Row_LSX;
+ }
+ }
+#endif
#if defined(HAS_YUY2TOYROW_LASX) && defined(HAS_YUY2TOUV422ROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
YUY2ToYRow = YUY2ToYRow_Any_LASX;
@@ -1984,6 +2234,16 @@ int UYVYToI422(const uint8_t* src_uyvy,
}
}
#endif
+#if defined(HAS_UYVYTOYROW_LSX) && defined(HAS_UYVYTOUV422ROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ UYVYToYRow = UYVYToYRow_Any_LSX;
+ UYVYToUV422Row = UYVYToUV422Row_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ UYVYToYRow = UYVYToYRow_LSX;
+ UYVYToUV422Row = UYVYToUV422Row_LSX;
+ }
+ }
+#endif
#if defined(HAS_UYVYTOYROW_LASX) && defined(HAS_UYVYTOUV422ROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
UYVYToYRow = UYVYToYRow_Any_LASX;
@@ -2131,6 +2391,14 @@ int UYVYToY(const uint8_t* src_uyvy,
}
}
#endif
+#if defined(HAS_UYVYTOYROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ UYVYToYRow = UYVYToYRow_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ UYVYToYRow = UYVYToYRow_LSX;
+ }
+ }
+#endif
for (y = 0; y < height; ++y) {
UYVYToYRow(src_uyvy, dst_y, width);
@@ -2189,6 +2457,14 @@ void MirrorPlane(const uint8_t* src_y,
}
}
#endif
+#if defined(HAS_MIRRORROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ MirrorRow = MirrorRow_Any_LSX;
+ if (IS_ALIGNED(width, 32)) {
+ MirrorRow = MirrorRow_LSX;
+ }
+ }
+#endif
#if defined(HAS_MIRRORROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
MirrorRow = MirrorRow_Any_LASX;
@@ -2255,6 +2531,14 @@ void MirrorUVPlane(const uint8_t* src_uv,
}
}
#endif
+#if defined(HAS_MIRRORUVROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ MirrorUVRow = MirrorUVRow_Any_LSX;
+ if (IS_ALIGNED(width, 8)) {
+ MirrorUVRow = MirrorUVRow_LSX;
+ }
+ }
+#endif
#if defined(HAS_MIRRORUVROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
MirrorUVRow = MirrorUVRow_Any_LASX;
@@ -2427,6 +2711,14 @@ int ARGBMirror(const uint8_t* src_argb,
}
}
#endif
+#if defined(HAS_ARGBMIRRORROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ ARGBMirrorRow = ARGBMirrorRow_Any_LSX;
+ if (IS_ALIGNED(width, 8)) {
+ ARGBMirrorRow = ARGBMirrorRow_LSX;
+ }
+ }
+#endif
#if defined(HAS_ARGBMIRRORROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
ARGBMirrorRow = ARGBMirrorRow_Any_LASX;
@@ -2809,6 +3101,14 @@ int ARGBMultiply(const uint8_t* src_argb0,
}
}
#endif
+#if defined(HAS_ARGBMULTIPLYROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ ARGBMultiplyRow = ARGBMultiplyRow_Any_LSX;
+ if (IS_ALIGNED(width, 4)) {
+ ARGBMultiplyRow = ARGBMultiplyRow_LSX;
+ }
+ }
+#endif
#if defined(HAS_ARGBMULTIPLYROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
ARGBMultiplyRow = ARGBMultiplyRow_Any_LASX;
@@ -2894,6 +3194,14 @@ int ARGBAdd(const uint8_t* src_argb0,
}
}
#endif
+#if defined(HAS_ARGBADDROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ ARGBAddRow = ARGBAddRow_Any_LSX;
+ if (IS_ALIGNED(width, 4)) {
+ ARGBAddRow = ARGBAddRow_LSX;
+ }
+ }
+#endif
#if defined(HAS_ARGBADDROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
ARGBAddRow = ARGBAddRow_Any_LASX;
@@ -2974,6 +3282,14 @@ int ARGBSubtract(const uint8_t* src_argb0,
}
}
#endif
+#if defined(HAS_ARGBSUBTRACTROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ ARGBSubtractRow = ARGBSubtractRow_Any_LSX;
+ if (IS_ALIGNED(width, 4)) {
+ ARGBSubtractRow = ARGBSubtractRow_LSX;
+ }
+ }
+#endif
#if defined(HAS_ARGBSUBTRACTROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
ARGBSubtractRow = ARGBSubtractRow_Any_LASX;
@@ -3051,6 +3367,11 @@ int RAWToRGB24(const uint8_t* src_raw,
}
}
#endif
+#if defined(HAS_RAWTORGB24ROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ RAWToRGB24Row = RAWToRGB24Row_RVV;
+ }
+#endif
for (y = 0; y < height; ++y) {
RAWToRGB24Row(src_raw, dst_rgb24, width);
@@ -3060,6 +3381,7 @@ int RAWToRGB24(const uint8_t* src_raw,
return 0;
}
+// TODO(fbarchard): Consider uint8_t value
LIBYUV_API
void SetPlane(uint8_t* dst_y,
int dst_stride_y,
@@ -3067,7 +3389,7 @@ void SetPlane(uint8_t* dst_y,
int height,
uint32_t value) {
int y;
- void (*SetRow)(uint8_t * dst, uint8_t value, int width) = SetRow_C;
+ void (*SetRow)(uint8_t* dst, uint8_t value, int width) = SetRow_C;
if (width <= 0 || height == 0) {
return;
@@ -3120,7 +3442,7 @@ void SetPlane(uint8_t* dst_y,
// Set plane
for (y = 0; y < height; ++y) {
- SetRow(dst_y, value, width);
+ SetRow(dst_y, (uint8_t)value, width);
dst_y += dst_stride_y;
}
}
@@ -3168,7 +3490,7 @@ int ARGBRect(uint8_t* dst_argb,
int height,
uint32_t value) {
int y;
- void (*ARGBSetRow)(uint8_t * dst_argb, uint32_t value, int width) =
+ void (*ARGBSetRow)(uint8_t* dst_argb, uint32_t value, int width) =
ARGBSetRow_C;
if (!dst_argb || width <= 0 || height == 0 || dst_x < 0 || dst_y < 0) {
return -1;
@@ -3293,6 +3615,14 @@ int ARGBAttenuate(const uint8_t* src_argb,
}
}
#endif
+#if defined(HAS_ARGBATTENUATEROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ ARGBAttenuateRow = ARGBAttenuateRow_Any_LSX;
+ if (IS_ALIGNED(width, 8)) {
+ ARGBAttenuateRow = ARGBAttenuateRow_LSX;
+ }
+ }
+#endif
#if defined(HAS_ARGBATTENUATEROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
ARGBAttenuateRow = ARGBAttenuateRow_Any_LASX;
@@ -3301,6 +3631,11 @@ int ARGBAttenuate(const uint8_t* src_argb,
}
}
#endif
+#if defined(HAS_ARGBATTENUATEROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ ARGBAttenuateRow = ARGBAttenuateRow_RVV;
+ }
+#endif
for (y = 0; y < height; ++y) {
ARGBAttenuateRow(src_argb, dst_argb, width);
@@ -3401,6 +3736,11 @@ int ARGBGrayTo(const uint8_t* src_argb,
ARGBGrayRow = ARGBGrayRow_MSA;
}
#endif
+#if defined(HAS_ARGBGRAYROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX) && IS_ALIGNED(width, 8)) {
+ ARGBGrayRow = ARGBGrayRow_LSX;
+ }
+#endif
#if defined(HAS_ARGBGRAYROW_LASX)
if (TestCpuFlag(kCpuHasLASX) && IS_ALIGNED(width, 16)) {
ARGBGrayRow = ARGBGrayRow_LASX;
@@ -3451,6 +3791,11 @@ int ARGBGray(uint8_t* dst_argb,
ARGBGrayRow = ARGBGrayRow_MSA;
}
#endif
+#if defined(HAS_ARGBGRAYROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX) && IS_ALIGNED(width, 8)) {
+ ARGBGrayRow = ARGBGrayRow_LSX;
+ }
+#endif
#if defined(HAS_ARGBGRAYROW_LASX)
if (TestCpuFlag(kCpuHasLASX) && IS_ALIGNED(width, 16)) {
ARGBGrayRow = ARGBGrayRow_LASX;
@@ -3473,7 +3818,7 @@ int ARGBSepia(uint8_t* dst_argb,
int width,
int height) {
int y;
- void (*ARGBSepiaRow)(uint8_t * dst_argb, int width) = ARGBSepiaRow_C;
+ void (*ARGBSepiaRow)(uint8_t* dst_argb, int width) = ARGBSepiaRow_C;
uint8_t* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
if (!dst_argb || width <= 0 || height <= 0 || dst_x < 0 || dst_y < 0) {
return -1;
@@ -3499,6 +3844,11 @@ int ARGBSepia(uint8_t* dst_argb,
ARGBSepiaRow = ARGBSepiaRow_MSA;
}
#endif
+#if defined(HAS_ARGBSEPIAROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX) && IS_ALIGNED(width, 8)) {
+ ARGBSepiaRow = ARGBSepiaRow_LSX;
+ }
+#endif
#if defined(HAS_ARGBSEPIAROW_LASX)
if (TestCpuFlag(kCpuHasLASX) && IS_ALIGNED(width, 16)) {
ARGBSepiaRow = ARGBSepiaRow_LASX;
@@ -3616,7 +3966,7 @@ int ARGBColorTable(uint8_t* dst_argb,
int width,
int height) {
int y;
- void (*ARGBColorTableRow)(uint8_t * dst_argb, const uint8_t* table_argb,
+ void (*ARGBColorTableRow)(uint8_t* dst_argb, const uint8_t* table_argb,
int width) = ARGBColorTableRow_C;
uint8_t* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
if (!dst_argb || !table_argb || width <= 0 || height <= 0 || dst_x < 0 ||
@@ -3652,7 +4002,7 @@ int RGBColorTable(uint8_t* dst_argb,
int width,
int height) {
int y;
- void (*RGBColorTableRow)(uint8_t * dst_argb, const uint8_t* table_argb,
+ void (*RGBColorTableRow)(uint8_t* dst_argb, const uint8_t* table_argb,
int width) = RGBColorTableRow_C;
uint8_t* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
if (!dst_argb || !table_argb || width <= 0 || height <= 0 || dst_x < 0 ||
@@ -3697,7 +4047,7 @@ int ARGBQuantize(uint8_t* dst_argb,
int width,
int height) {
int y;
- void (*ARGBQuantizeRow)(uint8_t * dst_argb, int scale, int interval_size,
+ void (*ARGBQuantizeRow)(uint8_t* dst_argb, int scale, int interval_size,
int interval_offset, int width) = ARGBQuantizeRow_C;
uint8_t* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
if (!dst_argb || width <= 0 || height <= 0 || dst_x < 0 || dst_y < 0 ||
@@ -3924,6 +4274,11 @@ int ARGBShade(const uint8_t* src_argb,
ARGBShadeRow = ARGBShadeRow_MSA;
}
#endif
+#if defined(HAS_ARGBSHADEROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX) && IS_ALIGNED(width, 4)) {
+ ARGBShadeRow = ARGBShadeRow_LSX;
+ }
+#endif
#if defined(HAS_ARGBSHADEROW_LASX)
if (TestCpuFlag(kCpuHasLASX) && IS_ALIGNED(width, 8)) {
ARGBShadeRow = ARGBShadeRow_LASX;
@@ -3950,7 +4305,7 @@ int InterpolatePlane(const uint8_t* src0,
int height,
int interpolation) {
int y;
- void (*InterpolateRow)(uint8_t * dst_ptr, const uint8_t* src_ptr,
+ void (*InterpolateRow)(uint8_t* dst_ptr, const uint8_t* src_ptr,
ptrdiff_t src_stride, int dst_width,
int source_y_fraction) = InterpolateRow_C;
if (!src0 || !src1 || !dst || width <= 0 || height == 0) {
@@ -4008,6 +4363,11 @@ int InterpolatePlane(const uint8_t* src0,
}
}
#endif
+#if defined(HAS_INTERPOLATEROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ InterpolateRow = InterpolateRow_RVV;
+ }
+#endif
for (y = 0; y < height; ++y) {
InterpolateRow(dst, src0, src1 - src0, width, interpolation);
@@ -4030,7 +4390,7 @@ int InterpolatePlane_16(const uint16_t* src0,
int height,
int interpolation) {
int y;
- void (*InterpolateRow_16)(uint16_t * dst_ptr, const uint16_t* src_ptr,
+ void (*InterpolateRow_16)(uint16_t* dst_ptr, const uint16_t* src_ptr,
ptrdiff_t src_stride, int dst_width,
int source_y_fraction) = InterpolateRow_16_C;
if (!src0 || !src1 || !dst || width <= 0 || height == 0) {
@@ -4213,6 +4573,14 @@ int ARGBShuffle(const uint8_t* src_bgra,
}
}
#endif
+#if defined(HAS_ARGBSHUFFLEROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ ARGBShuffleRow = ARGBShuffleRow_Any_LSX;
+ if (IS_ALIGNED(width, 8)) {
+ ARGBShuffleRow = ARGBShuffleRow_LSX;
+ }
+ }
+#endif
#if defined(HAS_ARGBSHUFFLEROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
ARGBShuffleRow = ARGBShuffleRow_Any_LASX;
@@ -4444,6 +4812,11 @@ static int ARGBSobelize(const uint8_t* src_argb,
}
}
#endif
+#if defined(HAS_ARGBTOYJROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ ARGBToYJRow = ARGBToYJRow_RVV;
+ }
+#endif
#if defined(HAS_SOBELYROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
@@ -4477,16 +4850,16 @@ static int ARGBSobelize(const uint8_t* src_argb,
#endif
{
// 3 rows with edges before/after.
- const int kRowSize = (width + kEdge + 31) & ~31;
- align_buffer_64(rows, kRowSize * 2 + (kEdge + kRowSize * 3 + kEdge));
+ const int row_size = (width + kEdge + 31) & ~31;
+ align_buffer_64(rows, row_size * 2 + (kEdge + row_size * 3 + kEdge));
uint8_t* row_sobelx = rows;
- uint8_t* row_sobely = rows + kRowSize;
- uint8_t* row_y = rows + kRowSize * 2;
+ uint8_t* row_sobely = rows + row_size;
+ uint8_t* row_y = rows + row_size * 2;
// Convert first row.
uint8_t* row_y0 = row_y + kEdge;
- uint8_t* row_y1 = row_y0 + kRowSize;
- uint8_t* row_y2 = row_y1 + kRowSize;
+ uint8_t* row_y1 = row_y0 + row_size;
+ uint8_t* row_y2 = row_y1 + row_size;
ARGBToYJRow(src_argb, row_y0, width);
row_y0[-1] = row_y0[0];
memset(row_y0 + width, row_y0[width - 1], 16); // Extrude 16 for valgrind.
@@ -5027,9 +5400,6 @@ int ARGBCopyYToAlpha(const uint8_t* src_y,
return 0;
}
-// TODO(fbarchard): Consider if width is even Y channel can be split
-// directly. A SplitUVRow_Odd function could copy the remaining chroma.
-
LIBYUV_API
int YUY2ToNV12(const uint8_t* src_yuy2,
int src_stride_yuy2,
@@ -5040,13 +5410,10 @@ int YUY2ToNV12(const uint8_t* src_yuy2,
int width,
int height) {
int y;
- int halfwidth = (width + 1) >> 1;
- void (*SplitUVRow)(const uint8_t* src_uv, uint8_t* dst_u, uint8_t* dst_v,
- int width) = SplitUVRow_C;
- void (*InterpolateRow)(uint8_t * dst_ptr, const uint8_t* src_ptr,
- ptrdiff_t src_stride, int dst_width,
- int source_y_fraction) = InterpolateRow_C;
-
+ void (*YUY2ToYRow)(const uint8_t* src_yuy2, uint8_t* dst_y, int width) =
+ YUY2ToYRow_C;
+ void (*YUY2ToNVUVRow)(const uint8_t* src_yuy2, int stride_yuy2,
+ uint8_t* dst_uv, int width) = YUY2ToNVUVRow_C;
if (!src_yuy2 || !dst_y || !dst_uv || width <= 0 || height == 0) {
return -1;
}
@@ -5057,109 +5424,91 @@ int YUY2ToNV12(const uint8_t* src_yuy2,
src_yuy2 = src_yuy2 + (height - 1) * src_stride_yuy2;
src_stride_yuy2 = -src_stride_yuy2;
}
-#if defined(HAS_SPLITUVROW_SSE2)
+#if defined(HAS_YUY2TOYROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
- SplitUVRow = SplitUVRow_Any_SSE2;
+ YUY2ToYRow = YUY2ToYRow_Any_SSE2;
if (IS_ALIGNED(width, 16)) {
- SplitUVRow = SplitUVRow_SSE2;
+ YUY2ToYRow = YUY2ToYRow_SSE2;
}
}
#endif
-#if defined(HAS_SPLITUVROW_AVX2)
+#if defined(HAS_YUY2TOYROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
- SplitUVRow = SplitUVRow_Any_AVX2;
+ YUY2ToYRow = YUY2ToYRow_Any_AVX2;
if (IS_ALIGNED(width, 32)) {
- SplitUVRow = SplitUVRow_AVX2;
+ YUY2ToYRow = YUY2ToYRow_AVX2;
}
}
#endif
-#if defined(HAS_SPLITUVROW_NEON)
+#if defined(HAS_YUY2TOYROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
- SplitUVRow = SplitUVRow_Any_NEON;
+ YUY2ToYRow = YUY2ToYRow_Any_NEON;
if (IS_ALIGNED(width, 16)) {
- SplitUVRow = SplitUVRow_NEON;
+ YUY2ToYRow = YUY2ToYRow_NEON;
}
}
#endif
-#if defined(HAS_SPLITUVROW_MSA)
+#if defined(HAS_YUY2TOYROW_MSA) && defined(HAS_YUY2TOUV422ROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
- SplitUVRow = SplitUVRow_Any_MSA;
+ YUY2ToYRow = YUY2ToYRow_Any_MSA;
if (IS_ALIGNED(width, 32)) {
- SplitUVRow = SplitUVRow_MSA;
+ YUY2ToYRow = YUY2ToYRow_MSA;
}
}
#endif
-#if defined(HAS_SPLITUVROW_LSX)
+#if defined(HAS_YUY2TOYROW_LSX) && defined(HAS_YUY2TOUV422ROW_LSX)
if (TestCpuFlag(kCpuHasLSX)) {
- SplitUVRow = SplitUVRow_Any_LSX;
- if (IS_ALIGNED(width, 32)) {
- SplitUVRow = SplitUVRow_LSX;
- }
- }
-#endif
-#if defined(HAS_INTERPOLATEROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3)) {
- InterpolateRow = InterpolateRow_Any_SSSE3;
+ YUY2ToYRow = YUY2ToYRow_Any_LSX;
if (IS_ALIGNED(width, 16)) {
- InterpolateRow = InterpolateRow_SSSE3;
+ YUY2ToYRow = YUY2ToYRow_LSX;
}
}
#endif
-#if defined(HAS_INTERPOLATEROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2)) {
- InterpolateRow = InterpolateRow_Any_AVX2;
+#if defined(HAS_YUY2TOYROW_LASX) && defined(HAS_YUY2TOUV422ROW_LASX)
+ if (TestCpuFlag(kCpuHasLASX)) {
+ YUY2ToYRow = YUY2ToYRow_Any_LASX;
if (IS_ALIGNED(width, 32)) {
- InterpolateRow = InterpolateRow_AVX2;
+ YUY2ToYRow = YUY2ToYRow_LASX;
}
}
#endif
-#if defined(HAS_INTERPOLATEROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- InterpolateRow = InterpolateRow_Any_NEON;
+
+#if defined(HAS_YUY2TONVUVROW_SSE2)
+ if (TestCpuFlag(kCpuHasSSE2)) {
+ YUY2ToNVUVRow = YUY2ToNVUVRow_Any_SSE2;
if (IS_ALIGNED(width, 16)) {
- InterpolateRow = InterpolateRow_NEON;
+ YUY2ToNVUVRow = YUY2ToNVUVRow_SSE2;
}
}
#endif
-#if defined(HAS_INTERPOLATEROW_MSA)
- if (TestCpuFlag(kCpuHasMSA)) {
- InterpolateRow = InterpolateRow_Any_MSA;
+#if defined(HAS_YUY2TONVUVROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ YUY2ToNVUVRow = YUY2ToNVUVRow_Any_AVX2;
if (IS_ALIGNED(width, 32)) {
- InterpolateRow = InterpolateRow_MSA;
+ YUY2ToNVUVRow = YUY2ToNVUVRow_AVX2;
}
}
#endif
-#if defined(HAS_INTERPOLATEROW_LSX)
- if (TestCpuFlag(kCpuHasLSX)) {
- InterpolateRow = InterpolateRow_Any_LSX;
- if (IS_ALIGNED(width, 32)) {
- InterpolateRow = InterpolateRow_LSX;
+#if defined(HAS_YUY2TONVUVROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ YUY2ToNVUVRow = YUY2ToNVUVRow_Any_NEON;
+ if (IS_ALIGNED(width, 16)) {
+ YUY2ToNVUVRow = YUY2ToNVUVRow_NEON;
}
}
#endif
- {
- int awidth = halfwidth * 2;
- // row of y and 2 rows of uv
- align_buffer_64(rows, awidth * 3);
-
- for (y = 0; y < height - 1; y += 2) {
- // Split Y from UV.
- SplitUVRow(src_yuy2, rows, rows + awidth, awidth);
- memcpy(dst_y, rows, width);
- SplitUVRow(src_yuy2 + src_stride_yuy2, rows, rows + awidth * 2, awidth);
- memcpy(dst_y + dst_stride_y, rows, width);
- InterpolateRow(dst_uv, rows + awidth, awidth, awidth, 128);
- src_yuy2 += src_stride_yuy2 * 2;
- dst_y += dst_stride_y * 2;
- dst_uv += dst_stride_uv;
- }
- if (height & 1) {
- // Split Y from UV.
- SplitUVRow(src_yuy2, rows, dst_uv, awidth);
- memcpy(dst_y, rows, width);
- }
- free_aligned_buffer_64(rows);
+ for (y = 0; y < height - 1; y += 2) {
+ YUY2ToYRow(src_yuy2, dst_y, width);
+ YUY2ToYRow(src_yuy2 + src_stride_yuy2, dst_y + dst_stride_y, width);
+ YUY2ToNVUVRow(src_yuy2, src_stride_yuy2, dst_uv, width);
+ src_yuy2 += src_stride_yuy2 * 2;
+ dst_y += dst_stride_y * 2;
+ dst_uv += dst_stride_uv;
+ }
+ if (height & 1) {
+ YUY2ToYRow(src_yuy2, dst_y, width);
+ YUY2ToNVUVRow(src_yuy2, 0, dst_uv, width);
}
return 0;
}
@@ -5177,7 +5526,7 @@ int UYVYToNV12(const uint8_t* src_uyvy,
int halfwidth = (width + 1) >> 1;
void (*SplitUVRow)(const uint8_t* src_uv, uint8_t* dst_u, uint8_t* dst_v,
int width) = SplitUVRow_C;
- void (*InterpolateRow)(uint8_t * dst_ptr, const uint8_t* src_ptr,
+ void (*InterpolateRow)(uint8_t* dst_ptr, const uint8_t* src_ptr,
ptrdiff_t src_stride, int dst_width,
int source_y_fraction) = InterpolateRow_C;
@@ -5231,6 +5580,12 @@ int UYVYToNV12(const uint8_t* src_uyvy,
}
}
#endif
+#if defined(HAS_SPLITUVROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ SplitUVRow = SplitUVRow_RVV;
+ }
+#endif
+
#if defined(HAS_INTERPOLATEROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
InterpolateRow = InterpolateRow_Any_SSSE3;
@@ -5271,6 +5626,11 @@ int UYVYToNV12(const uint8_t* src_uyvy,
}
}
#endif
+#if defined(HAS_INTERPOLATEROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ InterpolateRow = InterpolateRow_RVV;
+ }
+#endif
{
int awidth = halfwidth * 2;
@@ -5336,6 +5696,7 @@ void HalfMergeUVPlane(const uint8_t* src_u,
HalfMergeUVRow = HalfMergeUVRow_AVX2;
}
#endif
+
for (y = 0; y < height - 1; y += 2) {
// Merge a row of U and V into a row of UV.
HalfMergeUVRow(src_u, src_stride_u, src_v, src_stride_v, dst_uv, width);
diff --git a/files/source/rotate.cc b/files/source/rotate.cc
index f1e83cbd..8d3978c7 100644
--- a/files/source/rotate.cc
+++ b/files/source/rotate.cc
@@ -138,7 +138,7 @@ void RotatePlane180(const uint8_t* src,
int dst_stride,
int width,
int height) {
- // Swap first and last row and mirror the content. Uses a temporary row.
+ // Swap top and bottom row and mirror the content. Uses a temporary row.
align_buffer_64(row, width);
const uint8_t* src_bot = src + src_stride * (height - 1);
uint8_t* dst_bot = dst + dst_stride * (height - 1);
@@ -178,6 +178,14 @@ void RotatePlane180(const uint8_t* src,
}
}
#endif
+#if defined(HAS_MIRRORROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ MirrorRow = MirrorRow_Any_LSX;
+ if (IS_ALIGNED(width, 32)) {
+ MirrorRow = MirrorRow_LSX;
+ }
+ }
+#endif
#if defined(HAS_MIRRORROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
MirrorRow = MirrorRow_Any_LASX;
@@ -206,12 +214,17 @@ void RotatePlane180(const uint8_t* src,
CopyRow = IS_ALIGNED(width, 32) ? CopyRow_NEON : CopyRow_Any_NEON;
}
#endif
+#if defined(HAS_COPYROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ CopyRow = CopyRow_RVV;
+ }
+#endif
// Odd height will harmlessly mirror the middle row twice.
for (y = 0; y < half_height; ++y) {
- CopyRow(src, row, width); // Copy first row into buffer
- MirrorRow(src_bot, dst, width); // Mirror last row into first row
- MirrorRow(row, dst_bot, width); // Mirror buffer into last row
+ CopyRow(src, row, width); // Copy top row into buffer
+ MirrorRow(src_bot, dst, width); // Mirror bottom row into top row
+ MirrorRow(row, dst_bot, width); // Mirror buffer into bottom row
src += src_stride;
dst += dst_stride;
src_bot -= src_stride;
@@ -477,6 +490,120 @@ int RotatePlane(const uint8_t* src,
}
LIBYUV_API
+void TransposePlane_16(const uint16_t* src,
+ int src_stride,
+ uint16_t* dst,
+ int dst_stride,
+ int width,
+ int height) {
+ int i = height;
+ // Work across the source in 8x8 tiles
+ while (i >= 8) {
+ TransposeWx8_16_C(src, src_stride, dst, dst_stride, width);
+ src += 8 * src_stride; // Go down 8 rows.
+ dst += 8; // Move over 8 columns.
+ i -= 8;
+ }
+
+ if (i > 0) {
+ TransposeWxH_16_C(src, src_stride, dst, dst_stride, width, i);
+ }
+}
+
+static void RotatePlane90_16(const uint16_t* src,
+ int src_stride,
+ uint16_t* dst,
+ int dst_stride,
+ int width,
+ int height) {
+ // Rotate by 90 is a transpose with the source read
+ // from bottom to top. So set the source pointer to the end
+ // of the buffer and flip the sign of the source stride.
+ src += src_stride * (height - 1);
+ src_stride = -src_stride;
+ TransposePlane_16(src, src_stride, dst, dst_stride, width, height);
+}
+
+static void RotatePlane270_16(const uint16_t* src,
+ int src_stride,
+ uint16_t* dst,
+ int dst_stride,
+ int width,
+ int height) {
+ // Rotate by 270 is a transpose with the destination written
+ // from bottom to top. So set the destination pointer to the end
+ // of the buffer and flip the sign of the destination stride.
+ dst += dst_stride * (width - 1);
+ dst_stride = -dst_stride;
+ TransposePlane_16(src, src_stride, dst, dst_stride, width, height);
+}
+
+static void RotatePlane180_16(const uint16_t* src,
+ int src_stride,
+ uint16_t* dst,
+ int dst_stride,
+ int width,
+ int height) {
+ // Swap top and bottom row and mirror the content. Uses a temporary row.
+ align_buffer_64_16(row, width);
+ const uint16_t* src_bot = src + src_stride * (height - 1);
+ uint16_t* dst_bot = dst + dst_stride * (height - 1);
+ int half_height = (height + 1) >> 1;
+ int y;
+
+ // Odd height will harmlessly mirror the middle row twice.
+ for (y = 0; y < half_height; ++y) {
+ CopyRow_16_C(src, row, width); // Copy top row into buffer
+ MirrorRow_16_C(src_bot, dst, width); // Mirror bottom row into top row
+ MirrorRow_16_C(row, dst_bot, width); // Mirror buffer into bottom row
+ src += src_stride;
+ dst += dst_stride;
+ src_bot -= src_stride;
+ dst_bot -= dst_stride;
+ }
+ free_aligned_buffer_64_16(row);
+}
+
+LIBYUV_API
+int RotatePlane_16(const uint16_t* src,
+ int src_stride,
+ uint16_t* dst,
+ int dst_stride,
+ int width,
+ int height,
+ enum RotationMode mode) {
+ if (!src || width <= 0 || height == 0 || !dst) {
+ return -1;
+ }
+
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ src = src + (height - 1) * src_stride;
+ src_stride = -src_stride;
+ }
+
+ switch (mode) {
+ case kRotate0:
+ // copy frame
+ CopyPlane_16(src, src_stride, dst, dst_stride, width, height);
+ return 0;
+ case kRotate90:
+ RotatePlane90_16(src, src_stride, dst, dst_stride, width, height);
+ return 0;
+ case kRotate270:
+ RotatePlane270_16(src, src_stride, dst, dst_stride, width, height);
+ return 0;
+ case kRotate180:
+ RotatePlane180_16(src, src_stride, dst, dst_stride, width, height);
+ return 0;
+ default:
+ break;
+ }
+ return -1;
+}
+
+LIBYUV_API
int I420Rotate(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
@@ -544,6 +671,8 @@ int I420Rotate(const uint8_t* src_y,
return -1;
}
+// I422 has half width x full height UV planes, so rotate by 90 and 270
+// require scaling to maintain 422 subsampling.
LIBYUV_API
int I422Rotate(const uint8_t* src_y,
int src_stride_y,
@@ -579,31 +708,42 @@ int I422Rotate(const uint8_t* src_y,
switch (mode) {
case kRotate0:
- // copy frame
+ // Copy frame
CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
CopyPlane(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth, height);
CopyPlane(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth, height);
return 0;
+
+ // Note on temporary Y plane for UV.
+ // Rotation of UV first fits within the Y destination plane rows.
+ // Y plane is width x height
+ // Y plane rotated is height x width
+ // UV plane is (width / 2) x height
+ // UV plane rotated is height x (width / 2)
+ // UV plane rotated+scaled is (height / 2) x width.
+ // UV plane rotated is a temporary that fits within the Y plane rotated.
+
case kRotate90:
- // We need to rotate and rescale, we use plane Y as temporal storage.
- RotatePlane90(src_u, src_stride_u, dst_y, height, halfwidth, height);
- ScalePlane(dst_y, height, height, halfwidth, dst_u, halfheight,
+ RotatePlane90(src_u, src_stride_u, dst_y, dst_stride_y, halfwidth,
+ height);
+ ScalePlane(dst_y, dst_stride_y, height, halfwidth, dst_u, dst_stride_u,
halfheight, width, kFilterBilinear);
- RotatePlane90(src_v, src_stride_v, dst_y, height, halfwidth, height);
- ScalePlane(dst_y, height, height, halfwidth, dst_v, halfheight,
+ RotatePlane90(src_v, src_stride_v, dst_y, dst_stride_y, halfwidth,
+ height);
+ ScalePlane(dst_y, dst_stride_y, height, halfwidth, dst_v, dst_stride_v,
halfheight, width, kFilterLinear);
RotatePlane90(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
return 0;
case kRotate270:
- // We need to rotate and rescale, we use plane Y as temporal storage.
- RotatePlane270(src_u, src_stride_u, dst_y, height, halfwidth, height);
- ScalePlane(dst_y, height, height, halfwidth, dst_u, halfheight,
+ RotatePlane270(src_u, src_stride_u, dst_y, dst_stride_y, halfwidth,
+ height);
+ ScalePlane(dst_y, dst_stride_y, height, halfwidth, dst_u, dst_stride_u,
halfheight, width, kFilterBilinear);
- RotatePlane270(src_v, src_stride_v, dst_y, height, halfwidth, height);
- ScalePlane(dst_y, height, height, halfwidth, dst_v, halfheight,
+ RotatePlane270(src_v, src_stride_v, dst_y, dst_stride_y, halfwidth,
+ height);
+ ScalePlane(dst_y, dst_stride_y, height, halfwidth, dst_v, dst_stride_v,
halfheight, width, kFilterLinear);
RotatePlane270(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
-
return 0;
case kRotate180:
RotatePlane180(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
@@ -828,6 +968,228 @@ int Android420ToI420Rotate(const uint8_t* src_y,
return -1;
}
+LIBYUV_API
+int I010Rotate(const uint16_t* src_y,
+ int src_stride_y,
+ const uint16_t* src_u,
+ int src_stride_u,
+ const uint16_t* src_v,
+ int src_stride_v,
+ uint16_t* dst_y,
+ int dst_stride_y,
+ uint16_t* dst_u,
+ int dst_stride_u,
+ uint16_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height,
+ enum RotationMode mode) {
+ int halfwidth = (width + 1) >> 1;
+ int halfheight = (height + 1) >> 1;
+ if (!src_y || !src_u || !src_v || width <= 0 || height == 0 || !dst_y ||
+ !dst_u || !dst_v || dst_stride_y < 0) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ src_y = src_y + (height - 1) * src_stride_y;
+ src_u = src_u + (height - 1) * src_stride_u;
+ src_v = src_v + (height - 1) * src_stride_v;
+ src_stride_y = -src_stride_y;
+ src_stride_u = -src_stride_u;
+ src_stride_v = -src_stride_v;
+ }
+
+ switch (mode) {
+ case kRotate0:
+ // copy frame
+ return I010Copy(src_y, src_stride_y, src_u, src_stride_u, src_v,
+ src_stride_v, dst_y, dst_stride_y, dst_u, dst_stride_u,
+ dst_v, dst_stride_v, width, height);
+ case kRotate90:
+ RotatePlane90_16(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
+ RotatePlane90_16(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth,
+ halfheight);
+ RotatePlane90_16(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth,
+ halfheight);
+ return 0;
+ case kRotate270:
+ RotatePlane270_16(src_y, src_stride_y, dst_y, dst_stride_y, width,
+ height);
+ RotatePlane270_16(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth,
+ halfheight);
+ RotatePlane270_16(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth,
+ halfheight);
+ return 0;
+ case kRotate180:
+ RotatePlane180_16(src_y, src_stride_y, dst_y, dst_stride_y, width,
+ height);
+ RotatePlane180_16(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth,
+ halfheight);
+ RotatePlane180_16(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth,
+ halfheight);
+ return 0;
+ default:
+ break;
+ }
+ return -1;
+}
+
+// I210 has half width x full height UV planes, so rotate by 90 and 270
+// require scaling to maintain 422 subsampling.
+LIBYUV_API
+int I210Rotate(const uint16_t* src_y,
+ int src_stride_y,
+ const uint16_t* src_u,
+ int src_stride_u,
+ const uint16_t* src_v,
+ int src_stride_v,
+ uint16_t* dst_y,
+ int dst_stride_y,
+ uint16_t* dst_u,
+ int dst_stride_u,
+ uint16_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height,
+ enum RotationMode mode) {
+ int halfwidth = (width + 1) >> 1;
+ int halfheight = (height + 1) >> 1;
+ if (!src_y || !src_u || !src_v || width <= 0 || height == 0 || !dst_y ||
+ !dst_u || !dst_v) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ src_y = src_y + (height - 1) * src_stride_y;
+ src_u = src_u + (height - 1) * src_stride_u;
+ src_v = src_v + (height - 1) * src_stride_v;
+ src_stride_y = -src_stride_y;
+ src_stride_u = -src_stride_u;
+ src_stride_v = -src_stride_v;
+ }
+
+ switch (mode) {
+ case kRotate0:
+ // Copy frame
+ CopyPlane_16(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
+ CopyPlane_16(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth, height);
+ CopyPlane_16(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth, height);
+ return 0;
+
+ // Note on temporary Y plane for UV.
+ // Rotation of UV first fits within the Y destination plane rows.
+ // Y plane is width x height
+ // Y plane rotated is height x width
+ // UV plane is (width / 2) x height
+ // UV plane rotated is height x (width / 2)
+ // UV plane rotated+scaled is (height / 2) x width.
+ // UV plane rotated is a temporary that fits within the Y plane rotated.
+
+ case kRotate90:
+ RotatePlane90_16(src_u, src_stride_u, dst_y, dst_stride_y, halfwidth,
+ height);
+ ScalePlane_16(dst_y, dst_stride_y, height, halfwidth, dst_u, dst_stride_u,
+ halfheight, width, kFilterBilinear);
+ RotatePlane90_16(src_v, src_stride_v, dst_y, dst_stride_y, halfwidth,
+ height);
+ ScalePlane_16(dst_y, dst_stride_y, height, halfwidth, dst_v, dst_stride_v,
+ halfheight, width, kFilterLinear);
+ RotatePlane90_16(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
+ return 0;
+ case kRotate270:
+ RotatePlane270_16(src_u, src_stride_u, dst_y, dst_stride_y, halfwidth,
+ height);
+ ScalePlane_16(dst_y, dst_stride_y, height, halfwidth, dst_u, dst_stride_u,
+ halfheight, width, kFilterBilinear);
+ RotatePlane270_16(src_v, src_stride_v, dst_y, dst_stride_y, halfwidth,
+ height);
+ ScalePlane_16(dst_y, dst_stride_y, height, halfwidth, dst_v, dst_stride_v,
+ halfheight, width, kFilterLinear);
+ RotatePlane270_16(src_y, src_stride_y, dst_y, dst_stride_y, width,
+ height);
+ return 0;
+ case kRotate180:
+ RotatePlane180_16(src_y, src_stride_y, dst_y, dst_stride_y, width,
+ height);
+ RotatePlane180_16(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth,
+ height);
+ RotatePlane180_16(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth,
+ height);
+ return 0;
+ default:
+ break;
+ }
+ return -1;
+}
+
+LIBYUV_API
+int I410Rotate(const uint16_t* src_y,
+ int src_stride_y,
+ const uint16_t* src_u,
+ int src_stride_u,
+ const uint16_t* src_v,
+ int src_stride_v,
+ uint16_t* dst_y,
+ int dst_stride_y,
+ uint16_t* dst_u,
+ int dst_stride_u,
+ uint16_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height,
+ enum RotationMode mode) {
+ if (!src_y || !src_u || !src_v || width <= 0 || height == 0 || !dst_y ||
+ !dst_u || !dst_v || dst_stride_y < 0) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ src_y = src_y + (height - 1) * src_stride_y;
+ src_u = src_u + (height - 1) * src_stride_u;
+ src_v = src_v + (height - 1) * src_stride_v;
+ src_stride_y = -src_stride_y;
+ src_stride_u = -src_stride_u;
+ src_stride_v = -src_stride_v;
+ }
+
+ switch (mode) {
+ case kRotate0:
+ // copy frame
+ CopyPlane_16(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
+ CopyPlane_16(src_u, src_stride_u, dst_u, dst_stride_u, width, height);
+ CopyPlane_16(src_v, src_stride_v, dst_v, dst_stride_v, width, height);
+ return 0;
+ case kRotate90:
+ RotatePlane90_16(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
+ RotatePlane90_16(src_u, src_stride_u, dst_u, dst_stride_u, width, height);
+ RotatePlane90_16(src_v, src_stride_v, dst_v, dst_stride_v, width, height);
+ return 0;
+ case kRotate270:
+ RotatePlane270_16(src_y, src_stride_y, dst_y, dst_stride_y, width,
+ height);
+ RotatePlane270_16(src_u, src_stride_u, dst_u, dst_stride_u, width,
+ height);
+ RotatePlane270_16(src_v, src_stride_v, dst_v, dst_stride_v, width,
+ height);
+ return 0;
+ case kRotate180:
+ RotatePlane180_16(src_y, src_stride_y, dst_y, dst_stride_y, width,
+ height);
+ RotatePlane180_16(src_u, src_stride_u, dst_u, dst_stride_u, width,
+ height);
+ RotatePlane180_16(src_v, src_stride_v, dst_v, dst_stride_v, width,
+ height);
+ return 0;
+ default:
+ break;
+ }
+ return -1;
+}
+
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
diff --git a/files/source/rotate_argb.cc b/files/source/rotate_argb.cc
index 539cf98d..c7239010 100644
--- a/files/source/rotate_argb.cc
+++ b/files/source/rotate_argb.cc
@@ -8,11 +8,12 @@
* be found in the AUTHORS file in the root of the source tree.
*/
-#include "libyuv/rotate.h"
+#include "libyuv/rotate_argb.h"
#include "libyuv/convert.h"
#include "libyuv/cpu_id.h"
#include "libyuv/planar_functions.h"
+#include "libyuv/rotate.h"
#include "libyuv/row.h"
#include "libyuv/scale_row.h" /* for ScaleARGBRowDownEven_ */
@@ -155,6 +156,14 @@ static int ARGBRotate180(const uint8_t* src_argb,
}
}
#endif
+#if defined(HAS_ARGBMIRRORROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ ARGBMirrorRow = ARGBMirrorRow_Any_LSX;
+ if (IS_ALIGNED(width, 8)) {
+ ARGBMirrorRow = ARGBMirrorRow_LSX;
+ }
+ }
+#endif
#if defined(HAS_ARGBMIRRORROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
ARGBMirrorRow = ARGBMirrorRow_Any_LASX;
@@ -183,6 +192,11 @@ static int ARGBRotate180(const uint8_t* src_argb,
CopyRow = IS_ALIGNED(width * 4, 32) ? CopyRow_NEON : CopyRow_Any_NEON;
}
#endif
+#if defined(HAS_COPYROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ CopyRow = CopyRow_RVV;
+ }
+#endif
// Odd height will harmlessly mirror the middle row twice.
for (y = 0; y < half_height; ++y) {
diff --git a/files/source/rotate_common.cc b/files/source/rotate_common.cc
index ff212ade..4b496d1b 100644
--- a/files/source/rotate_common.cc
+++ b/files/source/rotate_common.cc
@@ -94,12 +94,135 @@ void TransposeUVWxH_C(const uint8_t* src,
for (i = 0; i < width * 2; i += 2) {
int j;
for (j = 0; j < height; ++j) {
- dst_a[j + ((i >> 1) * dst_stride_a)] = src[i + (j * src_stride)];
- dst_b[j + ((i >> 1) * dst_stride_b)] = src[i + (j * src_stride) + 1];
+ dst_a[((i >> 1) * dst_stride_a) + j] = src[i + (j * src_stride)];
+ dst_b[((i >> 1) * dst_stride_b) + j] = src[i + (j * src_stride) + 1];
}
}
}
+void TransposeWx8_16_C(const uint16_t* src,
+ int src_stride,
+ uint16_t* dst,
+ int dst_stride,
+ int width) {
+ int i;
+ for (i = 0; i < width; ++i) {
+ dst[0] = src[0 * src_stride];
+ dst[1] = src[1 * src_stride];
+ dst[2] = src[2 * src_stride];
+ dst[3] = src[3 * src_stride];
+ dst[4] = src[4 * src_stride];
+ dst[5] = src[5 * src_stride];
+ dst[6] = src[6 * src_stride];
+ dst[7] = src[7 * src_stride];
+ ++src;
+ dst += dst_stride;
+ }
+}
+
+void TransposeUVWx8_16_C(const uint16_t* src,
+ int src_stride,
+ uint16_t* dst_a,
+ int dst_stride_a,
+ uint16_t* dst_b,
+ int dst_stride_b,
+ int width) {
+ int i;
+ for (i = 0; i < width; ++i) {
+ dst_a[0] = src[0 * src_stride + 0];
+ dst_b[0] = src[0 * src_stride + 1];
+ dst_a[1] = src[1 * src_stride + 0];
+ dst_b[1] = src[1 * src_stride + 1];
+ dst_a[2] = src[2 * src_stride + 0];
+ dst_b[2] = src[2 * src_stride + 1];
+ dst_a[3] = src[3 * src_stride + 0];
+ dst_b[3] = src[3 * src_stride + 1];
+ dst_a[4] = src[4 * src_stride + 0];
+ dst_b[4] = src[4 * src_stride + 1];
+ dst_a[5] = src[5 * src_stride + 0];
+ dst_b[5] = src[5 * src_stride + 1];
+ dst_a[6] = src[6 * src_stride + 0];
+ dst_b[6] = src[6 * src_stride + 1];
+ dst_a[7] = src[7 * src_stride + 0];
+ dst_b[7] = src[7 * src_stride + 1];
+ src += 2;
+ dst_a += dst_stride_a;
+ dst_b += dst_stride_b;
+ }
+}
+
+void TransposeWxH_16_C(const uint16_t* src,
+ int src_stride,
+ uint16_t* dst,
+ int dst_stride,
+ int width,
+ int height) {
+ int i;
+ for (i = 0; i < width; ++i) {
+ int j;
+ for (j = 0; j < height; ++j) {
+ dst[i * dst_stride + j] = src[j * src_stride + i];
+ }
+ }
+}
+
+// Transpose 32 bit values (ARGB)
+void Transpose4x4_32_C(const uint8_t* src,
+ int src_stride,
+ uint8_t* dst,
+ int dst_stride,
+ int width) {
+ const uint8_t* src1 = src + src_stride;
+ const uint8_t* src2 = src1 + src_stride;
+ const uint8_t* src3 = src2 + src_stride;
+ uint8_t* dst1 = dst + dst_stride;
+ uint8_t* dst2 = dst1 + dst_stride;
+ uint8_t* dst3 = dst2 + dst_stride;
+ int i;
+ for (i = 0; i < width; i += 4) {
+ uint32_t p00 = ((uint32_t*)(src))[0];
+ uint32_t p10 = ((uint32_t*)(src))[1];
+ uint32_t p20 = ((uint32_t*)(src))[2];
+ uint32_t p30 = ((uint32_t*)(src))[3];
+ uint32_t p01 = ((uint32_t*)(src1))[0];
+ uint32_t p11 = ((uint32_t*)(src1))[1];
+ uint32_t p21 = ((uint32_t*)(src1))[2];
+ uint32_t p31 = ((uint32_t*)(src1))[3];
+ uint32_t p02 = ((uint32_t*)(src2))[0];
+ uint32_t p12 = ((uint32_t*)(src2))[1];
+ uint32_t p22 = ((uint32_t*)(src2))[2];
+ uint32_t p32 = ((uint32_t*)(src2))[3];
+ uint32_t p03 = ((uint32_t*)(src3))[0];
+ uint32_t p13 = ((uint32_t*)(src3))[1];
+ uint32_t p23 = ((uint32_t*)(src3))[2];
+ uint32_t p33 = ((uint32_t*)(src3))[3];
+ ((uint32_t*)(dst))[0] = p00;
+ ((uint32_t*)(dst))[1] = p01;
+ ((uint32_t*)(dst))[2] = p02;
+ ((uint32_t*)(dst))[3] = p03;
+ ((uint32_t*)(dst1))[0] = p10;
+ ((uint32_t*)(dst1))[1] = p11;
+ ((uint32_t*)(dst1))[2] = p12;
+ ((uint32_t*)(dst1))[3] = p13;
+ ((uint32_t*)(dst2))[0] = p20;
+ ((uint32_t*)(dst2))[1] = p21;
+ ((uint32_t*)(dst2))[2] = p22;
+ ((uint32_t*)(dst2))[3] = p23;
+ ((uint32_t*)(dst3))[0] = p30;
+ ((uint32_t*)(dst3))[1] = p31;
+ ((uint32_t*)(dst3))[2] = p32;
+ ((uint32_t*)(dst3))[3] = p33;
+ src += src_stride * 4; // advance 4 rows
+ src1 += src_stride * 4;
+ src2 += src_stride * 4;
+ src3 += src_stride * 4;
+ dst += 4 * 4; // advance 4 columns
+ dst1 += 4 * 4;
+ dst2 += 4 * 4;
+ dst3 += 4 * 4;
+ }
+}
+
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
diff --git a/files/source/rotate_gcc.cc b/files/source/rotate_gcc.cc
index 1a3f8cbb..fd5eee05 100644
--- a/files/source/rotate_gcc.cc
+++ b/files/source/rotate_gcc.cc
@@ -365,6 +365,136 @@ void TransposeUVWx8_SSE2(const uint8_t* src,
"xmm7", "xmm8", "xmm9");
}
#endif // defined(HAS_TRANSPOSEUVWX8_SSE2)
+
+#if defined(HAS_TRANSPOSE4X4_32_SSE2)
+// 4 values, little endian view
+// a b c d
+// e f g h
+// i j k l
+// m n o p
+
+// transpose 2x2
+// a e b f from row 0, 1
+// i m j n from row 2, 3
+// c g d h from row 0, 1
+// k o l p from row 2, 3
+
+// transpose 4x4
+// a e i m from row 0, 1
+// b f j n from row 0, 1
+// c g k o from row 2, 3
+// d h l p from row 2, 3
+
+// Transpose 32 bit values (ARGB)
+void Transpose4x4_32_SSE2(const uint8_t* src,
+ int src_stride,
+ uint8_t* dst,
+ int dst_stride,
+ int width) {
+ asm volatile(
+ // Main loop transpose 4x4. Read a column, write a row.
+ "1: \n"
+ "movdqu (%0),%%xmm0 \n" // a b c d
+ "movdqu (%0,%3),%%xmm1 \n" // e f g h
+ "lea (%0,%3,2),%0 \n" // src += stride * 2
+ "movdqu (%0),%%xmm2 \n" // i j k l
+ "movdqu (%0,%3),%%xmm3 \n" // m n o p
+ "lea (%0,%3,2),%0 \n" // src += stride * 2
+
+ // Transpose 2x2
+ "movdqa %%xmm0,%%xmm4 \n"
+ "movdqa %%xmm2,%%xmm5 \n"
+ "movdqa %%xmm0,%%xmm6 \n"
+ "movdqa %%xmm2,%%xmm7 \n"
+ "punpckldq %%xmm1,%%xmm4 \n" // a e b f from row 0, 1
+ "punpckldq %%xmm3,%%xmm5 \n" // i m j n from row 2, 3
+ "punpckhdq %%xmm1,%%xmm6 \n" // c g d h from row 0, 1
+ "punpckhdq %%xmm3,%%xmm7 \n" // k o l p from row 2, 3
+
+ // Transpose 4x4
+ "movdqa %%xmm4,%%xmm0 \n"
+ "movdqa %%xmm4,%%xmm1 \n"
+ "movdqa %%xmm6,%%xmm2 \n"
+ "movdqa %%xmm6,%%xmm3 \n"
+ "punpcklqdq %%xmm5,%%xmm0 \n" // a e i m from row 0, 1
+ "punpckhqdq %%xmm5,%%xmm1 \n" // b f j n from row 0, 1
+ "punpcklqdq %%xmm7,%%xmm2 \n" // c g k o from row 2, 3
+ "punpckhqdq %%xmm7,%%xmm3 \n" // d h l p from row 2, 3
+
+ "movdqu %%xmm0,(%1) \n"
+ "lea 16(%1,%4),%1 \n" // dst += stride + 16
+ "movdqu %%xmm1,-16(%1) \n"
+ "movdqu %%xmm2,-16(%1,%4) \n"
+ "movdqu %%xmm3,-16(%1,%4,2) \n"
+ "sub %4,%1 \n"
+ "sub $0x4,%2 \n"
+ "jg 1b \n"
+ : "+r"(src), // %0
+ "+r"(dst), // %1
+ "+rm"(width) // %2
+ : "r"((ptrdiff_t)(src_stride)), // %3
+ "r"((ptrdiff_t)(dst_stride)) // %4
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6",
+ "xmm7");
+}
+#endif // defined(HAS_TRANSPOSE4X4_32_SSE2)
+
+#if defined(HAS_TRANSPOSE4X4_32_AVX2)
+
+// Transpose 32 bit values (ARGB)
+void Transpose4x4_32_AVX2(const uint8_t* src,
+ int src_stride,
+ uint8_t* dst,
+ int dst_stride,
+ int width) {
+ asm volatile(
+ // Main loop transpose 2 blocks of 4x4. Read a column, write a row.
+ "1: \n"
+ "vmovdqu (%0),%%xmm0 \n" // a b c d
+ "vmovdqu (%0,%3),%%xmm1 \n" // e f g h
+ "lea (%0,%3,2),%0 \n" // src += stride * 2
+ "vmovdqu (%0),%%xmm2 \n" // i j k l
+ "vmovdqu (%0,%3),%%xmm3 \n" // m n o p
+ "lea (%0,%3,2),%0 \n" // src += stride * 2
+
+ "vinserti128 $1,(%0),%%ymm0,%%ymm0 \n" // a b c d
+ "vinserti128 $1,(%0,%3),%%ymm1,%%ymm1 \n" // e f g h
+ "lea (%0,%3,2),%0 \n" // src += stride * 2
+ "vinserti128 $1,(%0),%%ymm2,%%ymm2 \n" // i j k l
+ "vinserti128 $1,(%0,%3),%%ymm3,%%ymm3 \n" // m n o p
+ "lea (%0,%3,2),%0 \n" // src += stride * 2
+
+ // Transpose 2x2
+ "vpunpckldq %%ymm1,%%ymm0,%%ymm4 \n" // a e b f from row 0, 1
+ "vpunpckldq %%ymm3,%%ymm2,%%ymm5 \n" // i m j n from row 2, 3
+ "vpunpckhdq %%ymm1,%%ymm0,%%ymm6 \n" // c g d h from row 0, 1
+ "vpunpckhdq %%ymm3,%%ymm2,%%ymm7 \n" // k o l p from row 2, 3
+
+ // Transpose 4x4
+ "vpunpcklqdq %%ymm5,%%ymm4,%%ymm0 \n" // a e i m from row 0, 1
+ "vpunpckhqdq %%ymm5,%%ymm4,%%ymm1 \n" // b f j n from row 0, 1
+ "vpunpcklqdq %%ymm7,%%ymm6,%%ymm2 \n" // c g k o from row 2, 3
+ "vpunpckhqdq %%ymm7,%%ymm6,%%ymm3 \n" // d h l p from row 2, 3
+
+ "vmovdqu %%ymm0,(%1) \n"
+ "lea 32(%1,%4),%1 \n" // dst += stride + 32
+ "vmovdqu %%ymm1,-32(%1) \n"
+ "vmovdqu %%ymm2,-32(%1,%4) \n"
+ "vmovdqu %%ymm3,-32(%1,%4,2) \n"
+ "sub %4,%1 \n"
+ "sub $0x8,%2 \n"
+ "jg 1b \n"
+ "vzeroupper \n"
+ : "+r"(src), // %0
+ "+r"(dst), // %1
+ "+rm"(width) // %2
+ : "r"((ptrdiff_t)(src_stride)), // %3
+ "r"((ptrdiff_t)(dst_stride)) // %4
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6",
+ "xmm7");
+}
+#endif // defined(HAS_TRANSPOSE4X4_32_AVX2)
+
#endif // defined(__x86_64__) || defined(__i386__)
#ifdef __cplusplus
diff --git a/files/source/rotate_mmi.cc b/files/source/rotate_mmi.cc
deleted file mode 100644
index f8de6083..00000000
--- a/files/source/rotate_mmi.cc
+++ /dev/null
@@ -1,291 +0,0 @@
-/*
- * Copyright 2011 The LibYuv Project Authors. All rights reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "libyuv/rotate_row.h"
-#include "libyuv/row.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-// This module is for Mips MMI.
-#if !defined(LIBYUV_DISABLE_MMI) && defined(_MIPS_ARCH_LOONGSON3A)
-
-void TransposeWx8_MMI(const uint8_t* src,
- int src_stride,
- uint8_t* dst,
- int dst_stride,
- int width) {
- uint64_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
- uint64_t tmp7, tmp8, tmp9, tmp10, tmp11, tmp12, tmp13;
- uint8_t* src_tmp = nullptr;
-
- __asm__ volatile(
- "1: \n\t"
- "ldc1 %[tmp12], 0x00(%[src]) \n\t"
- "dadd %[src_tmp], %[src], %[src_stride] \n\t"
- "ldc1 %[tmp13], 0x00(%[src_tmp]) \n\t"
-
- /* tmp0 = (00 10 01 11 02 12 03 13) */
- "punpcklbh %[tmp0], %[tmp12], %[tmp13] \n\t"
- /* tmp1 = (04 14 05 15 06 16 07 17) */
- "punpckhbh %[tmp1], %[tmp12], %[tmp13] \n\t"
-
- "dadd %[src_tmp], %[src_tmp], %[src_stride] \n\t"
- "ldc1 %[tmp12], 0x00(%[src_tmp]) \n\t"
- "dadd %[src_tmp], %[src_tmp], %[src_stride] \n\t"
- "ldc1 %[tmp13], 0x00(%[src_tmp]) \n\t"
-
- /* tmp2 = (20 30 21 31 22 32 23 33) */
- "punpcklbh %[tmp2], %[tmp12], %[tmp13] \n\t"
- /* tmp3 = (24 34 25 35 26 36 27 37) */
- "punpckhbh %[tmp3], %[tmp12], %[tmp13] \n\t"
-
- /* tmp4 = (00 10 20 30 01 11 21 31) */
- "punpcklhw %[tmp4], %[tmp0], %[tmp2] \n\t"
- /* tmp5 = (02 12 22 32 03 13 23 33) */
- "punpckhhw %[tmp5], %[tmp0], %[tmp2] \n\t"
- /* tmp6 = (04 14 24 34 05 15 25 35) */
- "punpcklhw %[tmp6], %[tmp1], %[tmp3] \n\t"
- /* tmp7 = (06 16 26 36 07 17 27 37) */
- "punpckhhw %[tmp7], %[tmp1], %[tmp3] \n\t"
-
- "dadd %[src_tmp], %[src_tmp], %[src_stride] \n\t"
- "ldc1 %[tmp12], 0x00(%[src_tmp]) \n\t"
- "dadd %[src_tmp], %[src_tmp], %[src_stride] \n\t"
- "ldc1 %[tmp13], 0x00(%[src_tmp]) \n\t"
-
- /* tmp0 = (40 50 41 51 42 52 43 53) */
- "punpcklbh %[tmp0], %[tmp12], %[tmp13] \n\t"
- /* tmp1 = (44 54 45 55 46 56 47 57) */
- "punpckhbh %[tmp1], %[tmp12], %[tmp13] \n\t"
-
- "dadd %[src_tmp], %[src_tmp], %[src_stride] \n\t"
- "ldc1 %[tmp12], 0x00(%[src_tmp]) \n\t"
- "dadd %[src_tmp], %[src_tmp], %[src_stride] \n\t"
- "ldc1 %[tmp13], 0x00(%[src_tmp]) \n\t"
-
- /* tmp2 = (60 70 61 71 62 72 63 73) */
- "punpcklbh %[tmp2], %[tmp12], %[tmp13] \n\t"
- /* tmp3 = (64 74 65 75 66 76 67 77) */
- "punpckhbh %[tmp3], %[tmp12], %[tmp13] \n\t"
-
- /* tmp8 = (40 50 60 70 41 51 61 71) */
- "punpcklhw %[tmp8], %[tmp0], %[tmp2] \n\t"
- /* tmp9 = (42 52 62 72 43 53 63 73) */
- "punpckhhw %[tmp9], %[tmp0], %[tmp2] \n\t"
- /* tmp10 = (44 54 64 74 45 55 65 75) */
- "punpcklhw %[tmp10], %[tmp1], %[tmp3] \n\t"
- /* tmp11 = (46 56 66 76 47 57 67 77) */
- "punpckhhw %[tmp11], %[tmp1], %[tmp3] \n\t"
-
- /* tmp0 = (00 10 20 30 40 50 60 70) */
- "punpcklwd %[tmp0], %[tmp4], %[tmp8] \n\t"
- /* tmp1 = (01 11 21 31 41 51 61 71) */
- "punpckhwd %[tmp1], %[tmp4], %[tmp8] \n\t"
- "gssdlc1 %[tmp0], 0x07(%[dst]) \n\t"
- "gssdrc1 %[tmp0], 0x00(%[dst]) \n\t"
- "dadd %[dst], %[dst], %[dst_stride] \n\t"
- "gssdlc1 %[tmp1], 0x07(%[dst]) \n\t"
- "gssdrc1 %[tmp1], 0x00(%[dst]) \n\t"
-
- /* tmp0 = (02 12 22 32 42 52 62 72) */
- "punpcklwd %[tmp0], %[tmp5], %[tmp9] \n\t"
- /* tmp1 = (03 13 23 33 43 53 63 73) */
- "punpckhwd %[tmp1], %[tmp5], %[tmp9] \n\t"
- "dadd %[dst], %[dst], %[dst_stride] \n\t"
- "gssdlc1 %[tmp0], 0x07(%[dst]) \n\t"
- "gssdrc1 %[tmp0], 0x00(%[dst]) \n\t"
- "dadd %[dst], %[dst], %[dst_stride] \n\t"
- "gssdlc1 %[tmp1], 0x07(%[dst]) \n\t"
- "gssdrc1 %[tmp1], 0x00(%[dst]) \n\t"
-
- /* tmp0 = (04 14 24 34 44 54 64 74) */
- "punpcklwd %[tmp0], %[tmp6], %[tmp10] \n\t"
- /* tmp1 = (05 15 25 35 45 55 65 75) */
- "punpckhwd %[tmp1], %[tmp6], %[tmp10] \n\t"
- "dadd %[dst], %[dst], %[dst_stride] \n\t"
- "gssdlc1 %[tmp0], 0x07(%[dst]) \n\t"
- "gssdrc1 %[tmp0], 0x00(%[dst]) \n\t"
- "dadd %[dst], %[dst], %[dst_stride] \n\t"
- "gssdlc1 %[tmp1], 0x07(%[dst]) \n\t"
- "gssdrc1 %[tmp1], 0x00(%[dst]) \n\t"
-
- /* tmp0 = (06 16 26 36 46 56 66 76) */
- "punpcklwd %[tmp0], %[tmp7], %[tmp11] \n\t"
- /* tmp1 = (07 17 27 37 47 57 67 77) */
- "punpckhwd %[tmp1], %[tmp7], %[tmp11] \n\t"
- "dadd %[dst], %[dst], %[dst_stride] \n\t"
- "gssdlc1 %[tmp0], 0x07(%[dst]) \n\t"
- "gssdrc1 %[tmp0], 0x00(%[dst]) \n\t"
- "dadd %[dst], %[dst], %[dst_stride] \n\t"
- "gssdlc1 %[tmp1], 0x07(%[dst]) \n\t"
- "gssdrc1 %[tmp1], 0x00(%[dst]) \n\t"
-
- "dadd %[dst], %[dst], %[dst_stride] \n\t"
- "daddi %[src], %[src], 0x08 \n\t"
- "daddi %[width], %[width], -0x08 \n\t"
- "bnez %[width], 1b \n\t"
-
- : [tmp0] "=&f"(tmp0), [tmp1] "=&f"(tmp1), [tmp2] "=&f"(tmp2),
- [tmp3] "=&f"(tmp3), [tmp4] "=&f"(tmp4), [tmp5] "=&f"(tmp5),
- [tmp6] "=&f"(tmp6), [tmp7] "=&f"(tmp7), [tmp8] "=&f"(tmp8),
- [tmp9] "=&f"(tmp9), [tmp10] "=&f"(tmp10), [tmp11] "=&f"(tmp11),
- [tmp12] "=&f"(tmp12), [tmp13] "=&f"(tmp13), [dst] "+&r"(dst),
- [src_tmp] "+&r"(src_tmp)
- : [src] "r"(src), [width] "r"(width), [src_stride] "r"(src_stride),
- [dst_stride] "r"(dst_stride)
- : "memory");
-}
-
-void TransposeUVWx8_MMI(const uint8_t* src,
- int src_stride,
- uint8_t* dst_a,
- int dst_stride_a,
- uint8_t* dst_b,
- int dst_stride_b,
- int width) {
- uint64_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
- uint64_t tmp7, tmp8, tmp9, tmp10, tmp11, tmp12, tmp13;
- uint8_t* src_tmp = nullptr;
-
- __asm__ volatile(
- "1: \n\t"
- /* tmp12 = (u00 v00 u01 v01 u02 v02 u03 v03) */
- "ldc1 %[tmp12], 0x00(%[src]) \n\t"
- "dadd %[src_tmp], %[src], %[src_stride] \n\t"
- /* tmp13 = (u10 v10 u11 v11 u12 v12 u13 v13) */
- "ldc1 %[tmp13], 0x00(%[src_tmp]) \n\t"
-
- /* tmp0 = (u00 u10 v00 v10 u01 u11 v01 v11) */
- "punpcklbh %[tmp0], %[tmp12], %[tmp13] \n\t"
- /* tmp1 = (u02 u12 v02 v12 u03 u13 v03 v13) */
- "punpckhbh %[tmp1], %[tmp12], %[tmp13] \n\t"
-
- "dadd %[src_tmp], %[src_tmp], %[src_stride] \n\t"
- /* tmp12 = (u20 v20 u21 v21 u22 v22 u23 v23) */
- "ldc1 %[tmp12], 0x00(%[src_tmp]) \n\t"
- "dadd %[src_tmp], %[src_tmp], %[src_stride] \n\t"
- /* tmp13 = (u30 v30 u31 v31 u32 v32 u33 v33) */
- "ldc1 %[tmp13], 0x00(%[src_tmp]) \n\t"
-
- /* tmp2 = (u20 u30 v20 v30 u21 u31 v21 v31) */
- "punpcklbh %[tmp2], %[tmp12], %[tmp13] \n\t"
- /* tmp3 = (u22 u32 v22 v32 u23 u33 v23 v33) */
- "punpckhbh %[tmp3], %[tmp12], %[tmp13] \n\t"
-
- /* tmp4 = (u00 u10 u20 u30 v00 v10 v20 v30) */
- "punpcklhw %[tmp4], %[tmp0], %[tmp2] \n\t"
- /* tmp5 = (u01 u11 u21 u31 v01 v11 v21 v31) */
- "punpckhhw %[tmp5], %[tmp0], %[tmp2] \n\t"
- /* tmp6 = (u02 u12 u22 u32 v02 v12 v22 v32) */
- "punpcklhw %[tmp6], %[tmp1], %[tmp3] \n\t"
- /* tmp7 = (u03 u13 u23 u33 v03 v13 v23 v33) */
- "punpckhhw %[tmp7], %[tmp1], %[tmp3] \n\t"
-
- "dadd %[src_tmp], %[src_tmp], %[src_stride] \n\t"
- /* tmp12 = (u40 v40 u41 v41 u42 v42 u43 v43) */
- "ldc1 %[tmp12], 0x00(%[src_tmp]) \n\t"
- /* tmp13 = (u50 v50 u51 v51 u52 v52 u53 v53) */
- "dadd %[src_tmp], %[src_tmp], %[src_stride] \n\t"
- "ldc1 %[tmp13], 0x00(%[src_tmp]) \n\t"
-
- /* tmp0 = (u40 u50 v40 v50 u41 u51 v41 v51) */
- "punpcklbh %[tmp0], %[tmp12], %[tmp13] \n\t"
- /* tmp1 = (u42 u52 v42 v52 u43 u53 v43 v53) */
- "punpckhbh %[tmp1], %[tmp12], %[tmp13] \n\t"
-
- "dadd %[src_tmp], %[src_tmp], %[src_stride] \n\t"
- /* tmp12 = (u60 v60 u61 v61 u62 v62 u63 v63) */
- "ldc1 %[tmp12], 0x00(%[src_tmp]) \n\t"
- /* tmp13 = (u70 v70 u71 v71 u72 v72 u73 v73) */
- "dadd %[src_tmp], %[src_tmp], %[src_stride] \n\t"
- "ldc1 %[tmp13], 0x00(%[src_tmp]) \n\t"
-
- /* tmp2 = (u60 u70 v60 v70 u61 u71 v61 v71) */
- "punpcklbh %[tmp2], %[tmp12], %[tmp13] \n\t"
- /* tmp3 = (u62 u72 v62 v72 u63 u73 v63 v73) */
- "punpckhbh %[tmp3], %[tmp12], %[tmp13] \n\t"
-
- /* tmp8 = (u40 u50 u60 u70 v40 v50 v60 v70) */
- "punpcklhw %[tmp8], %[tmp0], %[tmp2] \n\t"
- /* tmp9 = (u41 u51 u61 u71 v41 v51 v61 v71) */
- "punpckhhw %[tmp9], %[tmp0], %[tmp2] \n\t"
- /* tmp10 = (u42 u52 u62 u72 v42 v52 v62 v72) */
- "punpcklhw %[tmp10], %[tmp1], %[tmp3] \n\t"
- /* tmp11 = (u43 u53 u63 u73 v43 v53 v63 v73) */
- "punpckhhw %[tmp11], %[tmp1], %[tmp3] \n\t"
-
- /* tmp0 = (u00 u10 u20 u30 u40 u50 u60 u70) */
- "punpcklwd %[tmp0], %[tmp4], %[tmp8] \n\t"
- /* tmp1 = (v00 v10 v20 v30 v40 v50 v60 v70) */
- "punpckhwd %[tmp1], %[tmp4], %[tmp8] \n\t"
- "gssdlc1 %[tmp0], 0x07(%[dst_a]) \n\t"
- "gssdrc1 %[tmp0], 0x00(%[dst_a]) \n\t"
- "gssdlc1 %[tmp1], 0x07(%[dst_b]) \n\t"
- "gssdrc1 %[tmp1], 0x00(%[dst_b]) \n\t"
-
- /* tmp0 = (u01 u11 u21 u31 u41 u51 u61 u71) */
- "punpcklwd %[tmp0], %[tmp5], %[tmp9] \n\t"
- /* tmp1 = (v01 v11 v21 v31 v41 v51 v61 v71) */
- "punpckhwd %[tmp1], %[tmp5], %[tmp9] \n\t"
- "dadd %[dst_a], %[dst_a], %[dst_stride_a] \n\t"
- "gssdlc1 %[tmp0], 0x07(%[dst_a]) \n\t"
- "gssdrc1 %[tmp0], 0x00(%[dst_a]) \n\t"
- "dadd %[dst_b], %[dst_b], %[dst_stride_b] \n\t"
- "gssdlc1 %[tmp1], 0x07(%[dst_b]) \n\t"
- "gssdrc1 %[tmp1], 0x00(%[dst_b]) \n\t"
-
- /* tmp0 = (u02 u12 u22 u32 u42 u52 u62 u72) */
- "punpcklwd %[tmp0], %[tmp6], %[tmp10] \n\t"
- /* tmp1 = (v02 v12 v22 v32 v42 v52 v62 v72) */
- "punpckhwd %[tmp1], %[tmp6], %[tmp10] \n\t"
- "dadd %[dst_a], %[dst_a], %[dst_stride_a] \n\t"
- "gssdlc1 %[tmp0], 0x07(%[dst_a]) \n\t"
- "gssdrc1 %[tmp0], 0x00(%[dst_a]) \n\t"
- "dadd %[dst_b], %[dst_b], %[dst_stride_b] \n\t"
- "gssdlc1 %[tmp1], 0x07(%[dst_b]) \n\t"
- "gssdrc1 %[tmp1], 0x00(%[dst_b]) \n\t"
-
- /* tmp0 = (u03 u13 u23 u33 u43 u53 u63 u73) */
- "punpcklwd %[tmp0], %[tmp7], %[tmp11] \n\t"
- /* tmp1 = (v03 v13 v23 v33 v43 v53 v63 v73) */
- "punpckhwd %[tmp1], %[tmp7], %[tmp11] \n\t"
- "dadd %[dst_a], %[dst_a], %[dst_stride_a] \n\t"
- "gssdlc1 %[tmp0], 0x07(%[dst_a]) \n\t"
- "gssdrc1 %[tmp0], 0x00(%[dst_a]) \n\t"
- "dadd %[dst_b], %[dst_b], %[dst_stride_b] \n\t"
- "gssdlc1 %[tmp1], 0x07(%[dst_b]) \n\t"
- "gssdrc1 %[tmp1], 0x00(%[dst_b]) \n\t"
-
- "dadd %[dst_a], %[dst_a], %[dst_stride_a] \n\t"
- "dadd %[dst_b], %[dst_b], %[dst_stride_b] \n\t"
- "daddiu %[src], %[src], 0x08 \n\t"
- "daddi %[width], %[width], -0x04 \n\t"
- "bnez %[width], 1b \n\t"
-
- : [tmp0] "=&f"(tmp0), [tmp1] "=&f"(tmp1), [tmp2] "=&f"(tmp2),
- [tmp3] "=&f"(tmp3), [tmp4] "=&f"(tmp4), [tmp5] "=&f"(tmp5),
- [tmp6] "=&f"(tmp6), [tmp7] "=&f"(tmp7), [tmp8] "=&f"(tmp8),
- [tmp9] "=&f"(tmp9), [tmp10] "=&f"(tmp10), [tmp11] "=&f"(tmp11),
- [tmp12] "=&f"(tmp12), [tmp13] "=&f"(tmp13), [dst_a] "+&r"(dst_a),
- [dst_b] "+&r"(dst_b), [src_tmp] "+&r"(src_tmp)
- : [src] "r"(src), [width] "r"(width), [dst_stride_a] "r"(dst_stride_a),
- [dst_stride_b] "r"(dst_stride_b), [src_stride] "r"(src_stride)
- : "memory");
-}
-
-#endif // !defined(LIBYUV_DISABLE_MMI) && defined(_MIPS_ARCH_LOONGSON3A)
-
-#ifdef __cplusplus
-} // extern "C"
-} // namespace libyuv
-#endif
diff --git a/files/source/rotate_neon.cc b/files/source/rotate_neon.cc
index 844df2bf..569a7318 100644
--- a/files/source/rotate_neon.cc
+++ b/files/source/rotate_neon.cc
@@ -410,6 +410,46 @@ void TransposeUVWx8_NEON(const uint8_t* src,
: "r"(&kVTbl4x4TransposeDi) // %8
: "memory", "cc", "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11");
}
+
+// Transpose 32 bit values (ARGB)
+void Transpose4x4_32_NEON(const uint8_t* src,
+ int src_stride,
+ uint8_t* dst,
+ int dst_stride,
+ int width) {
+ const uint8_t* src1 = src + src_stride;
+ const uint8_t* src2 = src1 + src_stride;
+ const uint8_t* src3 = src2 + src_stride;
+ uint8_t* dst1 = dst + dst_stride;
+ uint8_t* dst2 = dst1 + dst_stride;
+ uint8_t* dst3 = dst2 + dst_stride;
+ asm volatile(
+ // Main loop transpose 4x4. Read a column, write a row.
+ "1: \n"
+ "vld4.32 {d0[0], d2[0], d4[0], d6[0]}, [%0], %9 \n"
+ "vld4.32 {d0[1], d2[1], d4[1], d6[1]}, [%1], %9 \n"
+ "vld4.32 {d1[0], d3[0], d5[0], d7[0]}, [%2], %9 \n"
+ "vld4.32 {d1[1], d3[1], d5[1], d7[1]}, [%3], %9 \n"
+ "subs %8, %8, #4 \n" // w -= 4
+ "vst1.8 {q0}, [%4]! \n"
+ "vst1.8 {q1}, [%5]! \n"
+ "vst1.8 {q2}, [%6]! \n"
+ "vst1.8 {q3}, [%7]! \n"
+ "bgt 1b \n"
+
+ : "+r"(src), // %0
+ "+r"(src1), // %1
+ "+r"(src2), // %2
+ "+r"(src3), // %3
+ "+r"(dst), // %4
+ "+r"(dst1), // %5
+ "+r"(dst2), // %6
+ "+r"(dst3), // %7
+ "+r"(width) // %8
+ : "r"((ptrdiff_t)(src_stride * 4)) // %9
+ : "memory", "cc", "q0", "q1", "q2", "q3");
+}
+
#endif // defined(__ARM_NEON__) && !defined(__aarch64__)
#ifdef __cplusplus
diff --git a/files/source/rotate_neon64.cc b/files/source/rotate_neon64.cc
index 43c15817..95047fa7 100644
--- a/files/source/rotate_neon64.cc
+++ b/files/source/rotate_neon64.cc
@@ -201,13 +201,13 @@ void TransposeWx8_NEON(const uint8_t* src,
"4: \n"
- : "=&r"(src_temp), // %0
- "+r"(src), // %1
- "+r"(dst), // %2
- "+r"(width) // %3
- : "r"(&kVTbl4x4Transpose), // %4
- "r"(static_cast<ptrdiff_t>(src_stride)), // %5
- "r"(static_cast<ptrdiff_t>(dst_stride)) // %6
+ : "=&r"(src_temp), // %0
+ "+r"(src), // %1
+ "+r"(dst), // %2
+ "+r"(width) // %3
+ : "r"(&kVTbl4x4Transpose), // %4
+ "r"((ptrdiff_t)src_stride), // %5
+ "r"((ptrdiff_t)dst_stride) // %6
: "memory", "cc", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16",
"v17", "v18", "v19", "v20", "v21", "v22", "v23");
}
@@ -423,18 +423,57 @@ void TransposeUVWx8_NEON(const uint8_t* src,
"4: \n"
- : "=&r"(src_temp), // %0
- "+r"(src), // %1
- "+r"(dst_a), // %2
- "+r"(dst_b), // %3
- "+r"(width) // %4
- : "r"(static_cast<ptrdiff_t>(src_stride)), // %5
- "r"(static_cast<ptrdiff_t>(dst_stride_a)), // %6
- "r"(static_cast<ptrdiff_t>(dst_stride_b)), // %7
- "r"(&kVTbl4x4TransposeDi) // %8
+ : "=&r"(src_temp), // %0
+ "+r"(src), // %1
+ "+r"(dst_a), // %2
+ "+r"(dst_b), // %3
+ "+r"(width) // %4
+ : "r"((ptrdiff_t)src_stride), // %5
+ "r"((ptrdiff_t)dst_stride_a), // %6
+ "r"((ptrdiff_t)dst_stride_b), // %7
+ "r"(&kVTbl4x4TransposeDi) // %8
: "memory", "cc", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16",
"v17", "v18", "v19", "v20", "v21", "v22", "v23", "v30", "v31");
}
+
+// Transpose 32 bit values (ARGB)
+void Transpose4x4_32_NEON(const uint8_t* src,
+ int src_stride,
+ uint8_t* dst,
+ int dst_stride,
+ int width) {
+ const uint8_t* src1 = src + src_stride;
+ const uint8_t* src2 = src1 + src_stride;
+ const uint8_t* src3 = src2 + src_stride;
+ uint8_t* dst1 = dst + dst_stride;
+ uint8_t* dst2 = dst1 + dst_stride;
+ uint8_t* dst3 = dst2 + dst_stride;
+ asm volatile(
+ // Main loop transpose 4x4. Read a column, write a row.
+ "1: \n"
+ "ld4 {v0.s, v1.s, v2.s, v3.s}[0], [%0], %9 \n"
+ "ld4 {v0.s, v1.s, v2.s, v3.s}[1], [%1], %9 \n"
+ "ld4 {v0.s, v1.s, v2.s, v3.s}[2], [%2], %9 \n"
+ "ld4 {v0.s, v1.s, v2.s, v3.s}[3], [%3], %9 \n"
+ "subs %w8, %w8, #4 \n" // w -= 4
+ "st1 {v0.4s}, [%4], 16 \n"
+ "st1 {v1.4s}, [%5], 16 \n"
+ "st1 {v2.4s}, [%6], 16 \n"
+ "st1 {v3.4s}, [%7], 16 \n"
+ "b.gt 1b \n"
+ : "+r"(src), // %0
+ "+r"(src1), // %1
+ "+r"(src2), // %2
+ "+r"(src3), // %3
+ "+r"(dst), // %4
+ "+r"(dst1), // %5
+ "+r"(dst2), // %6
+ "+r"(dst3), // %7
+ "+r"(width) // %8
+ : "r"((ptrdiff_t)(src_stride * 4)) // %9
+ : "memory", "cc", "v0", "v1", "v2", "v3");
+}
+
#endif // !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
#ifdef __cplusplus
diff --git a/files/source/row_any.cc b/files/source/row_any.cc
index 3781a9f2..e574543c 100644
--- a/files/source/row_any.cc
+++ b/files/source/row_any.cc
@@ -19,7 +19,7 @@ namespace libyuv {
extern "C" {
#endif
-// memset for temp is meant to clear the source buffer (not dest) so that
+// memset for vin is meant to clear the source buffer so that
// SIMD that reads full multiple of 16 bytes will not trigger msan errors.
// memset is not needed for production, as the garbage values are processed but
// not used, although there may be edge cases for subsampling.
@@ -35,20 +35,20 @@ extern "C" {
void NAMEANY(const uint8_t* y_buf, const uint8_t* u_buf, \
const uint8_t* v_buf, const uint8_t* a_buf, uint8_t* dst_ptr, \
int width) { \
- SIMD_ALIGNED(uint8_t temp[64 * 5]); \
- memset(temp, 0, 64 * 4); /* for msan */ \
+ SIMD_ALIGNED(uint8_t vin[64 * 4]); \
+ SIMD_ALIGNED(uint8_t vout[64]); \
+ memset(vin, 0, sizeof(vin)); /* for msan */ \
int r = width & MASK; \
int n = width & ~MASK; \
if (n > 0) { \
ANY_SIMD(y_buf, u_buf, v_buf, a_buf, dst_ptr, n); \
} \
- memcpy(temp, y_buf + n, r); \
- memcpy(temp + 64, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \
- memcpy(temp + 128, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \
- memcpy(temp + 192, a_buf + n, r); \
- ANY_SIMD(temp, temp + 64, temp + 128, temp + 192, temp + 256, MASK + 1); \
- memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, temp + 256, \
- SS(r, DUVSHIFT) * BPP); \
+ memcpy(vin, y_buf + n, r); \
+ memcpy(vin + 64, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \
+ memcpy(vin + 128, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \
+ memcpy(vin + 192, a_buf + n, r); \
+ ANY_SIMD(vin, vin + 64, vin + 128, vin + 192, vout, MASK + 1); \
+ memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, vout, SS(r, DUVSHIFT) * BPP); \
}
#ifdef HAS_MERGEARGBROW_SSE2
@@ -68,25 +68,25 @@ ANY41(MergeARGBRow_Any_NEON, MergeARGBRow_NEON, 0, 0, 4, 15)
void NAMEANY(const uint8_t* y_buf, const uint8_t* u_buf, \
const uint8_t* v_buf, const uint8_t* a_buf, uint8_t* dst_ptr, \
const struct YuvConstants* yuvconstants, int width) { \
- SIMD_ALIGNED(uint8_t temp[64 * 5]); \
- memset(temp, 0, 64 * 4); /* for msan */ \
+ SIMD_ALIGNED(uint8_t vin[64 * 4]); \
+ SIMD_ALIGNED(uint8_t vout[64]); \
+ memset(vin, 0, sizeof(vin)); /* for msan */ \
int r = width & MASK; \
int n = width & ~MASK; \
if (n > 0) { \
ANY_SIMD(y_buf, u_buf, v_buf, a_buf, dst_ptr, yuvconstants, n); \
} \
- memcpy(temp, y_buf + n, r); \
- memcpy(temp + 64, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \
- memcpy(temp + 128, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \
- memcpy(temp + 192, a_buf + n, r); \
+ memcpy(vin, y_buf + n, r); \
+ memcpy(vin + 64, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \
+ memcpy(vin + 128, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \
+ memcpy(vin + 192, a_buf + n, r); \
if (width & 1) { \
- temp[64 + SS(r, UVSHIFT)] = temp[64 + SS(r, UVSHIFT) - 1]; \
- temp[128 + SS(r, UVSHIFT)] = temp[128 + SS(r, UVSHIFT) - 1]; \
+ vin[64 + SS(r, UVSHIFT)] = vin[64 + SS(r, UVSHIFT) - 1]; \
+ vin[128 + SS(r, UVSHIFT)] = vin[128 + SS(r, UVSHIFT) - 1]; \
} \
- ANY_SIMD(temp, temp + 64, temp + 128, temp + 192, temp + 256, \
- yuvconstants, MASK + 1); \
- memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, temp + 256, \
- SS(r, DUVSHIFT) * BPP); \
+ ANY_SIMD(vin, vin + 64, vin + 128, vin + 192, vout, yuvconstants, \
+ MASK + 1); \
+ memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, vout, SS(r, DUVSHIFT) * BPP); \
}
#ifdef HAS_I444ALPHATOARGBROW_SSSE3
@@ -113,6 +113,9 @@ ANY41C(I444AlphaToARGBRow_Any_MSA, I444AlphaToARGBRow_MSA, 0, 0, 4, 7)
#ifdef HAS_I422ALPHATOARGBROW_MSA
ANY41C(I422AlphaToARGBRow_Any_MSA, I422AlphaToARGBRow_MSA, 1, 0, 4, 7)
#endif
+#ifdef HAS_I422ALPHATOARGBROW_LSX
+ANY41C(I422AlphaToARGBRow_Any_LSX, I422AlphaToARGBRow_LSX, 1, 0, 4, 15)
+#endif
#ifdef HAS_I422ALPHATOARGBROW_LASX
ANY41C(I422AlphaToARGBRow_Any_LASX, I422AlphaToARGBRow_LASX, 1, 0, 4, 15)
#endif
@@ -123,21 +126,20 @@ ANY41C(I422AlphaToARGBRow_Any_LASX, I422AlphaToARGBRow_LASX, 1, 0, 4, 15)
void NAMEANY(const T* y_buf, const T* u_buf, const T* v_buf, const T* a_buf, \
uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, \
int width) { \
- SIMD_ALIGNED(T temp[16 * 4]); \
- SIMD_ALIGNED(uint8_t out[64]); \
- memset(temp, 0, 16 * 4 * SBPP); /* for YUY2 and msan */ \
+ SIMD_ALIGNED(T vin[16 * 4]); \
+ SIMD_ALIGNED(uint8_t vout[64]); \
+ memset(vin, 0, sizeof(vin)); /* for YUY2 and msan */ \
int r = width & MASK; \
int n = width & ~MASK; \
if (n > 0) { \
ANY_SIMD(y_buf, u_buf, v_buf, a_buf, dst_ptr, yuvconstants, n); \
} \
- memcpy(temp, y_buf + n, r * SBPP); \
- memcpy(temp + 16, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT) * SBPP); \
- memcpy(temp + 32, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT) * SBPP); \
- memcpy(temp + 48, a_buf + n, r * SBPP); \
- ANY_SIMD(temp, temp + 16, temp + 32, temp + 48, out, yuvconstants, \
- MASK + 1); \
- memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, out, SS(r, DUVSHIFT) * BPP); \
+ memcpy(vin, y_buf + n, r * SBPP); \
+ memcpy(vin + 16, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT) * SBPP); \
+ memcpy(vin + 32, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT) * SBPP); \
+ memcpy(vin + 48, a_buf + n, r * SBPP); \
+ ANY_SIMD(vin, vin + 16, vin + 32, vin + 48, vout, yuvconstants, MASK + 1); \
+ memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, vout, SS(r, DUVSHIFT) * BPP); \
}
#ifdef HAS_I210ALPHATOARGBROW_SSSE3
@@ -190,20 +192,20 @@ ANY41CT(I410AlphaToARGBRow_Any_AVX2,
#define ANY41PT(NAMEANY, ANY_SIMD, STYPE, SBPP, DTYPE, BPP, MASK) \
void NAMEANY(const STYPE* r_buf, const STYPE* g_buf, const STYPE* b_buf, \
const STYPE* a_buf, DTYPE* dst_ptr, int depth, int width) { \
- SIMD_ALIGNED(STYPE temp[16 * 4]); \
- SIMD_ALIGNED(DTYPE out[64]); \
- memset(temp, 0, 16 * 4 * SBPP); /* for YUY2 and msan */ \
+ SIMD_ALIGNED(STYPE vin[16 * 4]); \
+ SIMD_ALIGNED(DTYPE vout[64]); \
+ memset(vin, 0, sizeof(vin)); /* for msan */ \
int r = width & MASK; \
int n = width & ~MASK; \
if (n > 0) { \
ANY_SIMD(r_buf, g_buf, b_buf, a_buf, dst_ptr, depth, n); \
} \
- memcpy(temp, r_buf + n, r * SBPP); \
- memcpy(temp + 16, g_buf + n, r * SBPP); \
- memcpy(temp + 32, b_buf + n, r * SBPP); \
- memcpy(temp + 48, a_buf + n, r * SBPP); \
- ANY_SIMD(temp, temp + 16, temp + 32, temp + 48, out, depth, MASK + 1); \
- memcpy((uint8_t*)dst_ptr + n * BPP, out, r * BPP); \
+ memcpy(vin, r_buf + n, r * SBPP); \
+ memcpy(vin + 16, g_buf + n, r * SBPP); \
+ memcpy(vin + 32, b_buf + n, r * SBPP); \
+ memcpy(vin + 48, a_buf + n, r * SBPP); \
+ ANY_SIMD(vin, vin + 16, vin + 32, vin + 48, vout, depth, MASK + 1); \
+ memcpy((uint8_t*)dst_ptr + n * BPP, vout, r * BPP); \
}
#ifdef HAS_MERGEAR64ROW_AVX2
@@ -237,22 +239,22 @@ ANY41PT(MergeARGB16To8Row_Any_NEON,
#undef ANY41PT
// Any 3 planes to 1.
-#define ANY31(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, BPP, MASK) \
- void NAMEANY(const uint8_t* y_buf, const uint8_t* u_buf, \
- const uint8_t* v_buf, uint8_t* dst_ptr, int width) { \
- SIMD_ALIGNED(uint8_t temp[64 * 4]); \
- memset(temp, 0, 64 * 3); /* for YUY2 and msan */ \
- int r = width & MASK; \
- int n = width & ~MASK; \
- if (n > 0) { \
- ANY_SIMD(y_buf, u_buf, v_buf, dst_ptr, n); \
- } \
- memcpy(temp, y_buf + n, r); \
- memcpy(temp + 64, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \
- memcpy(temp + 128, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \
- ANY_SIMD(temp, temp + 64, temp + 128, temp + 192, MASK + 1); \
- memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, temp + 192, \
- SS(r, DUVSHIFT) * BPP); \
+#define ANY31(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, BPP, MASK) \
+ void NAMEANY(const uint8_t* y_buf, const uint8_t* u_buf, \
+ const uint8_t* v_buf, uint8_t* dst_ptr, int width) { \
+ SIMD_ALIGNED(uint8_t vin[64 * 3]); \
+ SIMD_ALIGNED(uint8_t vout[64]); \
+ memset(vin, 0, sizeof(vin)); /* for YUY2 and msan */ \
+ int r = width & MASK; \
+ int n = width & ~MASK; \
+ if (n > 0) { \
+ ANY_SIMD(y_buf, u_buf, v_buf, dst_ptr, n); \
+ } \
+ memcpy(vin, y_buf + n, r); \
+ memcpy(vin + 64, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \
+ memcpy(vin + 128, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \
+ ANY_SIMD(vin, vin + 64, vin + 128, vout, MASK + 1); \
+ memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, vout, SS(r, DUVSHIFT) * BPP); \
}
// Merge functions.
@@ -285,6 +287,9 @@ ANY31(I422ToYUY2Row_Any_NEON, I422ToYUY2Row_NEON, 1, 1, 4, 15)
#ifdef HAS_I422TOYUY2ROW_MSA
ANY31(I422ToYUY2Row_Any_MSA, I422ToYUY2Row_MSA, 1, 1, 4, 31)
#endif
+#ifdef HAS_I422TOYUY2ROW_LSX
+ANY31(I422ToYUY2Row_Any_LSX, I422ToYUY2Row_LSX, 1, 1, 4, 15)
+#endif
#ifdef HAS_I422TOYUY2ROW_LASX
ANY31(I422ToYUY2Row_Any_LASX, I422ToYUY2Row_LASX, 1, 1, 4, 31)
#endif
@@ -294,6 +299,9 @@ ANY31(I422ToUYVYRow_Any_NEON, I422ToUYVYRow_NEON, 1, 1, 4, 15)
#ifdef HAS_I422TOUYVYROW_MSA
ANY31(I422ToUYVYRow_Any_MSA, I422ToUYVYRow_MSA, 1, 1, 4, 31)
#endif
+#ifdef HAS_I422TOUYVYROW_LSX
+ANY31(I422ToUYVYRow_Any_LSX, I422ToUYVYRow_LSX, 1, 1, 4, 15)
+#endif
#ifdef HAS_I422TOUYVYROW_LASX
ANY31(I422ToUYVYRow_Any_LASX, I422ToUYVYRow_LASX, 1, 1, 4, 31)
#endif
@@ -308,28 +316,27 @@ ANY31(BlendPlaneRow_Any_SSSE3, BlendPlaneRow_SSSE3, 0, 0, 1, 7)
// Note that odd width replication includes 444 due to implementation
// on arm that subsamples 444 to 422 internally.
// Any 3 planes to 1 with yuvconstants
-#define ANY31C(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, BPP, MASK) \
- void NAMEANY(const uint8_t* y_buf, const uint8_t* u_buf, \
- const uint8_t* v_buf, uint8_t* dst_ptr, \
- const struct YuvConstants* yuvconstants, int width) { \
- SIMD_ALIGNED(uint8_t temp[128 * 4]); \
- memset(temp, 0, 128 * 3); /* for YUY2 and msan */ \
- int r = width & MASK; \
- int n = width & ~MASK; \
- if (n > 0) { \
- ANY_SIMD(y_buf, u_buf, v_buf, dst_ptr, yuvconstants, n); \
- } \
- memcpy(temp, y_buf + n, r); \
- memcpy(temp + 128, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \
- memcpy(temp + 256, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \
- if (width & 1) { \
- temp[128 + SS(r, UVSHIFT)] = temp[128 + SS(r, UVSHIFT) - 1]; \
- temp[256 + SS(r, UVSHIFT)] = temp[256 + SS(r, UVSHIFT) - 1]; \
- } \
- ANY_SIMD(temp, temp + 128, temp + 256, temp + 384, yuvconstants, \
- MASK + 1); \
- memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, temp + 384, \
- SS(r, DUVSHIFT) * BPP); \
+#define ANY31C(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, BPP, MASK) \
+ void NAMEANY(const uint8_t* y_buf, const uint8_t* u_buf, \
+ const uint8_t* v_buf, uint8_t* dst_ptr, \
+ const struct YuvConstants* yuvconstants, int width) { \
+ SIMD_ALIGNED(uint8_t vin[128 * 3]); \
+ SIMD_ALIGNED(uint8_t vout[128]); \
+ memset(vin, 0, sizeof(vin)); /* for YUY2 and msan */ \
+ int r = width & MASK; \
+ int n = width & ~MASK; \
+ if (n > 0) { \
+ ANY_SIMD(y_buf, u_buf, v_buf, dst_ptr, yuvconstants, n); \
+ } \
+ memcpy(vin, y_buf + n, r); \
+ memcpy(vin + 128, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \
+ memcpy(vin + 256, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \
+ if (width & 1) { \
+ vin[128 + SS(r, UVSHIFT)] = vin[128 + SS(r, UVSHIFT) - 1]; \
+ vin[256 + SS(r, UVSHIFT)] = vin[256 + SS(r, UVSHIFT) - 1]; \
+ } \
+ ANY_SIMD(vin, vin + 128, vin + 256, vout, yuvconstants, MASK + 1); \
+ memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, vout, SS(r, DUVSHIFT) * BPP); \
}
#ifdef HAS_I422TOARGBROW_SSSE3
@@ -359,6 +366,9 @@ ANY31C(I422ToAR30Row_Any_AVX2, I422ToAR30Row_AVX2, 1, 0, 4, 15)
#ifdef HAS_I444TOARGBROW_SSSE3
ANY31C(I444ToARGBRow_Any_SSSE3, I444ToARGBRow_SSSE3, 0, 0, 4, 7)
#endif
+#ifdef HAS_I444TORGB24ROW_SSSE3
+ANY31C(I444ToRGB24Row_Any_SSSE3, I444ToRGB24Row_SSSE3, 0, 0, 3, 15)
+#endif
#ifdef HAS_I422TORGB24ROW_AVX2
ANY31C(I422ToRGB24Row_Any_AVX2, I422ToRGB24Row_AVX2, 1, 0, 3, 31)
#endif
@@ -374,6 +384,9 @@ ANY31C(I422ToRGBARow_Any_AVX2, I422ToRGBARow_AVX2, 1, 0, 4, 15)
#ifdef HAS_I444TOARGBROW_AVX2
ANY31C(I444ToARGBRow_Any_AVX2, I444ToARGBRow_AVX2, 0, 0, 4, 15)
#endif
+#ifdef HAS_I444TORGB24ROW_AVX2
+ANY31C(I444ToRGB24Row_Any_AVX2, I444ToRGB24Row_AVX2, 0, 0, 3, 31)
+#endif
#ifdef HAS_I422TOARGB4444ROW_AVX2
ANY31C(I422ToARGB4444Row_Any_AVX2, I422ToARGB4444Row_AVX2, 1, 0, 2, 15)
#endif
@@ -383,6 +396,9 @@ ANY31C(I422ToARGB1555Row_Any_AVX2, I422ToARGB1555Row_AVX2, 1, 0, 2, 15)
#ifdef HAS_I422TORGB565ROW_AVX2
ANY31C(I422ToRGB565Row_Any_AVX2, I422ToRGB565Row_AVX2, 1, 0, 2, 15)
#endif
+#ifdef HAS_I444TORGB24ROW_NEON
+ANY31C(I444ToRGB24Row_Any_NEON, I444ToRGB24Row_NEON, 0, 0, 3, 7)
+#endif
#ifdef HAS_I422TOARGBROW_NEON
ANY31C(I444ToARGBRow_Any_NEON, I444ToARGBRow_NEON, 0, 0, 4, 7)
ANY31C(I422ToARGBRow_Any_NEON, I422ToARGBRow_NEON, 1, 0, 4, 7)
@@ -401,6 +417,14 @@ ANY31C(I422ToARGB4444Row_Any_MSA, I422ToARGB4444Row_MSA, 1, 0, 2, 7)
ANY31C(I422ToARGB1555Row_Any_MSA, I422ToARGB1555Row_MSA, 1, 0, 2, 7)
ANY31C(I422ToRGB565Row_Any_MSA, I422ToRGB565Row_MSA, 1, 0, 2, 7)
#endif
+#ifdef HAS_I422TOARGBROW_LSX
+ANY31C(I422ToARGBRow_Any_LSX, I422ToARGBRow_LSX, 1, 0, 4, 15)
+ANY31C(I422ToRGBARow_Any_LSX, I422ToRGBARow_LSX, 1, 0, 4, 15)
+ANY31C(I422ToRGB24Row_Any_LSX, I422ToRGB24Row_LSX, 1, 0, 3, 15)
+ANY31C(I422ToRGB565Row_Any_LSX, I422ToRGB565Row_LSX, 1, 0, 2, 15)
+ANY31C(I422ToARGB4444Row_Any_LSX, I422ToARGB4444Row_LSX, 1, 0, 2, 15)
+ANY31C(I422ToARGB1555Row_Any_LSX, I422ToARGB1555Row_LSX, 1, 0, 2, 15)
+#endif
#ifdef HAS_I422TOARGBROW_LASX
ANY31C(I422ToARGBRow_Any_LASX, I422ToARGBRow_LASX, 1, 0, 4, 31)
ANY31C(I422ToRGBARow_Any_LASX, I422ToRGBARow_LASX, 1, 0, 4, 31)
@@ -420,19 +444,19 @@ ANY31C(I444ToARGBRow_Any_LSX, I444ToARGBRow_LSX, 0, 0, 4, 15)
void NAMEANY(const T* y_buf, const T* u_buf, const T* v_buf, \
uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, \
int width) { \
- SIMD_ALIGNED(T temp[16 * 3]); \
- SIMD_ALIGNED(uint8_t out[64]); \
- memset(temp, 0, 16 * 3 * SBPP); /* for YUY2 and msan */ \
+ SIMD_ALIGNED(T vin[16 * 3]); \
+ SIMD_ALIGNED(uint8_t vout[64]); \
+ memset(vin, 0, sizeof(vin)); /* for YUY2 and msan */ \
int r = width & MASK; \
int n = width & ~MASK; \
if (n > 0) { \
ANY_SIMD(y_buf, u_buf, v_buf, dst_ptr, yuvconstants, n); \
} \
- memcpy(temp, y_buf + n, r * SBPP); \
- memcpy(temp + 16, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT) * SBPP); \
- memcpy(temp + 32, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT) * SBPP); \
- ANY_SIMD(temp, temp + 16, temp + 32, out, yuvconstants, MASK + 1); \
- memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, out, SS(r, DUVSHIFT) * BPP); \
+ memcpy(vin, y_buf + n, r * SBPP); \
+ memcpy(vin + 16, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT) * SBPP); \
+ memcpy(vin + 32, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT) * SBPP); \
+ ANY_SIMD(vin, vin + 16, vin + 32, vout, yuvconstants, MASK + 1); \
+ memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, vout, SS(r, DUVSHIFT) * BPP); \
}
#ifdef HAS_I210TOAR30ROW_SSSE3
@@ -477,19 +501,19 @@ ANY31CT(I212ToAR30Row_Any_AVX2, I212ToAR30Row_AVX2, 1, 0, uint16_t, 2, 4, 15)
#define ANY31PT(NAMEANY, ANY_SIMD, STYPE, SBPP, DTYPE, BPP, MASK) \
void NAMEANY(const STYPE* r_buf, const STYPE* g_buf, const STYPE* b_buf, \
DTYPE* dst_ptr, int depth, int width) { \
- SIMD_ALIGNED(STYPE temp[16 * 3]); \
- SIMD_ALIGNED(DTYPE out[64]); \
- memset(temp, 0, 16 * 3 * SBPP); /* for YUY2 and msan */ \
+ SIMD_ALIGNED(STYPE vin[16 * 3]); \
+ SIMD_ALIGNED(DTYPE vout[64]); \
+ memset(vin, 0, sizeof(vin)); /* for YUY2 and msan */ \
int r = width & MASK; \
int n = width & ~MASK; \
if (n > 0) { \
ANY_SIMD(r_buf, g_buf, b_buf, dst_ptr, depth, n); \
} \
- memcpy(temp, r_buf + n, r * SBPP); \
- memcpy(temp + 16, g_buf + n, r * SBPP); \
- memcpy(temp + 32, b_buf + n, r * SBPP); \
- ANY_SIMD(temp, temp + 16, temp + 32, out, depth, MASK + 1); \
- memcpy((uint8_t*)dst_ptr + n * BPP, out, r * BPP); \
+ memcpy(vin, r_buf + n, r * SBPP); \
+ memcpy(vin + 16, g_buf + n, r * SBPP); \
+ memcpy(vin + 32, b_buf + n, r * SBPP); \
+ ANY_SIMD(vin, vin + 16, vin + 32, vout, depth, MASK + 1); \
+ memcpy((uint8_t*)dst_ptr + n * BPP, vout, r * BPP); \
}
#ifdef HAS_MERGEXR30ROW_AVX2
@@ -541,18 +565,19 @@ ANY31PT(MergeXRGB16To8Row_Any_NEON,
#define ANY21(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, SBPP2, BPP, MASK) \
void NAMEANY(const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* dst_ptr, \
int width) { \
- SIMD_ALIGNED(uint8_t temp[128 * 3]); \
- memset(temp, 0, 128 * 2); /* for msan */ \
+ SIMD_ALIGNED(uint8_t vin[128 * 2]); \
+ SIMD_ALIGNED(uint8_t vout[128]); \
+ memset(vin, 0, sizeof(vin)); /* for msan */ \
int r = width & MASK; \
int n = width & ~MASK; \
if (n > 0) { \
ANY_SIMD(y_buf, uv_buf, dst_ptr, n); \
} \
- memcpy(temp, y_buf + n * SBPP, r * SBPP); \
- memcpy(temp + 128, uv_buf + (n >> UVSHIFT) * SBPP2, \
+ memcpy(vin, y_buf + n * SBPP, r * SBPP); \
+ memcpy(vin + 128, uv_buf + (n >> UVSHIFT) * SBPP2, \
SS(r, UVSHIFT) * SBPP2); \
- ANY_SIMD(temp, temp + 128, temp + 256, MASK + 1); \
- memcpy(dst_ptr + n * BPP, temp + 256, r * BPP); \
+ ANY_SIMD(vin, vin + 128, vout, MASK + 1); \
+ memcpy(dst_ptr + n * BPP, vout, r * BPP); \
}
// Merge functions.
@@ -560,7 +585,10 @@ ANY31PT(MergeXRGB16To8Row_Any_NEON,
ANY21(MergeUVRow_Any_SSE2, MergeUVRow_SSE2, 0, 1, 1, 2, 15)
#endif
#ifdef HAS_MERGEUVROW_AVX2
-ANY21(MergeUVRow_Any_AVX2, MergeUVRow_AVX2, 0, 1, 1, 2, 31)
+ANY21(MergeUVRow_Any_AVX2, MergeUVRow_AVX2, 0, 1, 1, 2, 15)
+#endif
+#ifdef HAS_MERGEUVROW_AVX512BW
+ANY21(MergeUVRow_Any_AVX512BW, MergeUVRow_AVX512BW, 0, 1, 1, 2, 31)
#endif
#ifdef HAS_MERGEUVROW_NEON
ANY21(MergeUVRow_Any_NEON, MergeUVRow_NEON, 0, 1, 1, 2, 15)
@@ -611,18 +639,27 @@ ANY21(ARGBSubtractRow_Any_NEON, ARGBSubtractRow_NEON, 0, 4, 4, 4, 7)
#ifdef HAS_ARGBMULTIPLYROW_MSA
ANY21(ARGBMultiplyRow_Any_MSA, ARGBMultiplyRow_MSA, 0, 4, 4, 4, 3)
#endif
+#ifdef HAS_ARGBMULTIPLYROW_LSX
+ANY21(ARGBMultiplyRow_Any_LSX, ARGBMultiplyRow_LSX, 0, 4, 4, 4, 3)
+#endif
#ifdef HAS_ARGBMULTIPLYROW_LASX
ANY21(ARGBMultiplyRow_Any_LASX, ARGBMultiplyRow_LASX, 0, 4, 4, 4, 7)
#endif
#ifdef HAS_ARGBADDROW_MSA
ANY21(ARGBAddRow_Any_MSA, ARGBAddRow_MSA, 0, 4, 4, 4, 7)
#endif
+#ifdef HAS_ARGBADDROW_LSX
+ANY21(ARGBAddRow_Any_LSX, ARGBAddRow_LSX, 0, 4, 4, 4, 3)
+#endif
#ifdef HAS_ARGBADDROW_LASX
ANY21(ARGBAddRow_Any_LASX, ARGBAddRow_LASX, 0, 4, 4, 4, 7)
#endif
#ifdef HAS_ARGBSUBTRACTROW_MSA
ANY21(ARGBSubtractRow_Any_MSA, ARGBSubtractRow_MSA, 0, 4, 4, 4, 7)
#endif
+#ifdef HAS_ARGBSUBTRACTROW_LSX
+ANY21(ARGBSubtractRow_Any_LSX, ARGBSubtractRow_LSX, 0, 4, 4, 4, 3)
+#endif
#ifdef HAS_ARGBSUBTRACTROW_LASX
ANY21(ARGBSubtractRow_Any_LASX, ARGBSubtractRow_LASX, 0, 4, 4, 4, 7)
#endif
@@ -664,22 +701,53 @@ ANY21(SobelXYRow_Any_LSX, SobelXYRow_LSX, 0, 1, 1, 4, 15)
#endif
#undef ANY21
+// Any 2 planes to 1 with stride
+// width is measured in source pixels. 4 bytes contains 2 pixels
+#define ANY21S(NAMEANY, ANY_SIMD, SBPP, BPP, MASK) \
+ void NAMEANY(const uint8_t* src_yuy2, int stride_yuy2, uint8_t* dst_uv, \
+ int width) { \
+ SIMD_ALIGNED(uint8_t vin[32 * 2]); \
+ SIMD_ALIGNED(uint8_t vout[32]); \
+ memset(vin, 0, sizeof(vin)); /* for msan */ \
+ int awidth = (width + 1) / 2; \
+ int r = awidth & MASK; \
+ int n = awidth & ~MASK; \
+ if (n > 0) { \
+ ANY_SIMD(src_yuy2, stride_yuy2, dst_uv, n * 2); \
+ } \
+ memcpy(vin, src_yuy2 + n * SBPP, r * SBPP); \
+ memcpy(vin + 32, src_yuy2 + stride_yuy2 + n * SBPP, r * SBPP); \
+ ANY_SIMD(vin, 32, vout, MASK + 1); \
+ memcpy(dst_uv + n * BPP, vout, r * BPP); \
+ }
+
+#ifdef HAS_YUY2TONVUVROW_NEON
+ANY21S(YUY2ToNVUVRow_Any_NEON, YUY2ToNVUVRow_NEON, 4, 2, 7)
+#endif
+#ifdef HAS_YUY2TONVUVROW_SSE2
+ANY21S(YUY2ToNVUVRow_Any_SSE2, YUY2ToNVUVRow_SSE2, 4, 2, 7)
+#endif
+#ifdef HAS_YUY2TONVUVROW_AVX2
+ANY21S(YUY2ToNVUVRow_Any_AVX2, YUY2ToNVUVRow_AVX2, 4, 2, 15)
+#endif
+
// Any 2 planes to 1 with yuvconstants
#define ANY21C(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, SBPP2, BPP, MASK) \
void NAMEANY(const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* dst_ptr, \
const struct YuvConstants* yuvconstants, int width) { \
- SIMD_ALIGNED(uint8_t temp[128 * 3]); \
- memset(temp, 0, 128 * 2); /* for msan */ \
+ SIMD_ALIGNED(uint8_t vin[128 * 2]); \
+ SIMD_ALIGNED(uint8_t vout[128]); \
+ memset(vin, 0, sizeof(vin)); /* for msan */ \
int r = width & MASK; \
int n = width & ~MASK; \
if (n > 0) { \
ANY_SIMD(y_buf, uv_buf, dst_ptr, yuvconstants, n); \
} \
- memcpy(temp, y_buf + n * SBPP, r * SBPP); \
- memcpy(temp + 128, uv_buf + (n >> UVSHIFT) * SBPP2, \
+ memcpy(vin, y_buf + n * SBPP, r * SBPP); \
+ memcpy(vin + 128, uv_buf + (n >> UVSHIFT) * SBPP2, \
SS(r, UVSHIFT) * SBPP2); \
- ANY_SIMD(temp, temp + 128, temp + 256, yuvconstants, MASK + 1); \
- memcpy(dst_ptr + n * BPP, temp + 256, r * BPP); \
+ ANY_SIMD(vin, vin + 128, vout, yuvconstants, MASK + 1); \
+ memcpy(dst_ptr + n * BPP, vout, r * BPP); \
}
// Biplanar to RGB.
@@ -758,21 +826,21 @@ ANY21C(NV12ToRGB565Row_Any_LASX, NV12ToRGB565Row_LASX, 1, 1, 2, 2, 15)
#undef ANY21C
// Any 2 planes of 16 bit to 1 with yuvconstants
-#define ANY21CT(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, T, SBPP, BPP, MASK) \
- void NAMEANY(const T* y_buf, const T* uv_buf, uint8_t* dst_ptr, \
- const struct YuvConstants* yuvconstants, int width) { \
- SIMD_ALIGNED(T temp[16 * 3]); \
- SIMD_ALIGNED(uint8_t out[64]); \
- memset(temp, 0, 16 * 3 * SBPP); /* for YUY2 and msan */ \
- int r = width & MASK; \
- int n = width & ~MASK; \
- if (n > 0) { \
- ANY_SIMD(y_buf, uv_buf, dst_ptr, yuvconstants, n); \
- } \
- memcpy(temp, y_buf + n, r * SBPP); \
- memcpy(temp + 16, uv_buf + 2 * (n >> UVSHIFT), SS(r, UVSHIFT) * SBPP * 2); \
- ANY_SIMD(temp, temp + 16, out, yuvconstants, MASK + 1); \
- memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, out, SS(r, DUVSHIFT) * BPP); \
+#define ANY21CT(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, T, SBPP, BPP, MASK) \
+ void NAMEANY(const T* y_buf, const T* uv_buf, uint8_t* dst_ptr, \
+ const struct YuvConstants* yuvconstants, int width) { \
+ SIMD_ALIGNED(T vin[16 * 2]); \
+ SIMD_ALIGNED(uint8_t vout[64]); \
+ memset(vin, 0, sizeof(vin)); /* for msan */ \
+ int r = width & MASK; \
+ int n = width & ~MASK; \
+ if (n > 0) { \
+ ANY_SIMD(y_buf, uv_buf, dst_ptr, yuvconstants, n); \
+ } \
+ memcpy(vin, y_buf + n, r * SBPP); \
+ memcpy(vin + 16, uv_buf + 2 * (n >> UVSHIFT), SS(r, UVSHIFT) * SBPP * 2); \
+ ANY_SIMD(vin, vin + 16, vout, yuvconstants, MASK + 1); \
+ memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, vout, SS(r, DUVSHIFT) * BPP); \
}
#ifdef HAS_P210TOAR30ROW_SSSE3
@@ -806,21 +874,22 @@ ANY21CT(P410ToAR30Row_Any_AVX2, P410ToAR30Row_AVX2, 0, 0, uint16_t, 2, 4, 15)
#define ANY21PT(NAMEANY, ANY_SIMD, T, BPP, MASK) \
void NAMEANY(const T* src_u, const T* src_v, T* dst_uv, int depth, \
int width) { \
- SIMD_ALIGNED(T temp[16 * 4]); \
- memset(temp, 0, 16 * 4 * BPP); /* for msan */ \
+ SIMD_ALIGNED(T vin[16 * 2]); \
+ SIMD_ALIGNED(T vout[16]); \
+ memset(vin, 0, sizeof(vin)); /* for msan */ \
int r = width & MASK; \
int n = width & ~MASK; \
if (n > 0) { \
ANY_SIMD(src_u, src_v, dst_uv, depth, n); \
} \
- memcpy(temp, src_u + n, r * BPP); \
- memcpy(temp + 16, src_v + n, r * BPP); \
- ANY_SIMD(temp, temp + 16, temp + 32, depth, MASK + 1); \
- memcpy(dst_uv + n * 2, temp + 32, r * BPP * 2); \
+ memcpy(vin, src_u + n, r * BPP); \
+ memcpy(vin + 16, src_v + n, r * BPP); \
+ ANY_SIMD(vin, vin + 16, vout, depth, MASK + 1); \
+ memcpy(dst_uv + n * 2, vout, r * BPP * 2); \
}
#ifdef HAS_MERGEUVROW_16_AVX2
-ANY21PT(MergeUVRow_16_Any_AVX2, MergeUVRow_16_AVX2, uint16_t, 2, 15)
+ANY21PT(MergeUVRow_16_Any_AVX2, MergeUVRow_16_AVX2, uint16_t, 2, 7)
#endif
#ifdef HAS_MERGEUVROW_16_NEON
ANY21PT(MergeUVRow_16_Any_NEON, MergeUVRow_16_NEON, uint16_t, 2, 7)
@@ -829,18 +898,19 @@ ANY21PT(MergeUVRow_16_Any_NEON, MergeUVRow_16_NEON, uint16_t, 2, 7)
#undef ANY21CT
// Any 1 to 1.
-#define ANY11(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, BPP, MASK) \
- void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_ptr, int width) { \
- SIMD_ALIGNED(uint8_t temp[128 * 2]); \
- memset(temp, 0, 128); /* for YUY2 and msan */ \
- int r = width & MASK; \
- int n = width & ~MASK; \
- if (n > 0) { \
- ANY_SIMD(src_ptr, dst_ptr, n); \
- } \
- memcpy(temp, src_ptr + (n >> UVSHIFT) * SBPP, SS(r, UVSHIFT) * SBPP); \
- ANY_SIMD(temp, temp + 128, MASK + 1); \
- memcpy(dst_ptr + n * BPP, temp + 128, r * BPP); \
+#define ANY11(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, BPP, MASK) \
+ void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_ptr, int width) { \
+ SIMD_ALIGNED(uint8_t vin[128]); \
+ SIMD_ALIGNED(uint8_t vout[128]); \
+ memset(vin, 0, sizeof(vin)); /* for YUY2 and msan */ \
+ int r = width & MASK; \
+ int n = width & ~MASK; \
+ if (n > 0) { \
+ ANY_SIMD(src_ptr, dst_ptr, n); \
+ } \
+ memcpy(vin, src_ptr + (n >> UVSHIFT) * SBPP, SS(r, UVSHIFT) * SBPP); \
+ ANY_SIMD(vin, vout, MASK + 1); \
+ memcpy(dst_ptr + n * BPP, vout, r * BPP); \
}
#ifdef HAS_COPYROW_AVX
@@ -931,6 +1001,13 @@ ANY11(ARGBToARGB1555Row_Any_MSA, ARGBToARGB1555Row_MSA, 0, 4, 2, 7)
ANY11(ARGBToARGB4444Row_Any_MSA, ARGBToARGB4444Row_MSA, 0, 4, 2, 7)
ANY11(J400ToARGBRow_Any_MSA, J400ToARGBRow_MSA, 0, 1, 4, 15)
#endif
+#if defined(HAS_ARGBTORGB24ROW_LSX)
+ANY11(ARGBToRGB24Row_Any_LSX, ARGBToRGB24Row_LSX, 0, 4, 3, 15)
+ANY11(ARGBToRAWRow_Any_LSX, ARGBToRAWRow_LSX, 0, 4, 3, 15)
+ANY11(ARGBToRGB565Row_Any_LSX, ARGBToRGB565Row_LSX, 0, 4, 2, 7)
+ANY11(ARGBToARGB1555Row_Any_LSX, ARGBToARGB1555Row_LSX, 0, 4, 2, 7)
+ANY11(ARGBToARGB4444Row_Any_LSX, ARGBToARGB4444Row_LSX, 0, 4, 2, 7)
+#endif
#if defined(HAS_ARGBTORGB24ROW_LASX)
ANY11(ARGBToRGB24Row_Any_LASX, ARGBToRGB24Row_LASX, 0, 4, 3, 31)
ANY11(ARGBToRAWRow_Any_LASX, ARGBToRAWRow_LASX, 0, 4, 3, 31)
@@ -959,6 +1036,9 @@ ANY11(ABGRToYRow_Any_AVX2, ABGRToYRow_AVX2, 0, 4, 1, 31)
#ifdef HAS_ARGBTOYJROW_AVX2
ANY11(ARGBToYJRow_Any_AVX2, ARGBToYJRow_AVX2, 0, 4, 1, 31)
#endif
+#ifdef HAS_ABGRTOYJROW_AVX2
+ANY11(ABGRToYJRow_Any_AVX2, ABGRToYJRow_AVX2, 0, 4, 1, 31)
+#endif
#ifdef HAS_RGBATOYJROW_AVX2
ANY11(RGBAToYJRow_Any_AVX2, RGBAToYJRow_AVX2, 0, 4, 1, 31)
#endif
@@ -983,6 +1063,9 @@ ANY11(UYVYToYRow_Any_SSE2, UYVYToYRow_SSE2, 1, 4, 1, 15)
#ifdef HAS_ARGBTOYJROW_SSSE3
ANY11(ARGBToYJRow_Any_SSSE3, ARGBToYJRow_SSSE3, 0, 4, 1, 15)
#endif
+#ifdef HAS_ABGRTOYJROW_SSSE3
+ANY11(ABGRToYJRow_Any_SSSE3, ABGRToYJRow_SSSE3, 0, 4, 1, 15)
+#endif
#ifdef HAS_RGBATOYJROW_SSSE3
ANY11(RGBAToYJRow_Any_SSSE3, RGBAToYJRow_SSSE3, 0, 4, 1, 15)
#endif
@@ -992,12 +1075,18 @@ ANY11(ARGBToYRow_Any_NEON, ARGBToYRow_NEON, 0, 4, 1, 15)
#ifdef HAS_ARGBTOYROW_MSA
ANY11(ARGBToYRow_Any_MSA, ARGBToYRow_MSA, 0, 4, 1, 15)
#endif
+#ifdef HAS_ARGBTOYROW_LSX
+ANY11(ARGBToYRow_Any_LSX, ARGBToYRow_LSX, 0, 4, 1, 15)
+#endif
#ifdef HAS_ARGBTOYROW_LASX
ANY11(ARGBToYRow_Any_LASX, ARGBToYRow_LASX, 0, 4, 1, 31)
#endif
#ifdef HAS_ARGBTOYJROW_NEON
ANY11(ARGBToYJRow_Any_NEON, ARGBToYJRow_NEON, 0, 4, 1, 15)
#endif
+#ifdef HAS_ABGRTOYJROW_NEON
+ANY11(ABGRToYJRow_Any_NEON, ABGRToYJRow_NEON, 0, 4, 1, 15)
+#endif
#ifdef HAS_RGBATOYJROW_NEON
ANY11(RGBAToYJRow_Any_NEON, RGBAToYJRow_NEON, 0, 4, 1, 15)
#endif
@@ -1007,9 +1096,21 @@ ANY11(ARGBToYJRow_Any_MSA, ARGBToYJRow_MSA, 0, 4, 1, 15)
#ifdef HAS_ARGBTOYJROW_LSX
ANY11(ARGBToYJRow_Any_LSX, ARGBToYJRow_LSX, 0, 4, 1, 15)
#endif
+#ifdef HAS_RGBATOYJROW_LSX
+ANY11(RGBAToYJRow_Any_LSX, RGBAToYJRow_LSX, 0, 4, 1, 15)
+#endif
+#ifdef HAS_ABGRTOYJROW_LSX
+ANY11(ABGRToYJRow_Any_LSX, ABGRToYJRow_LSX, 0, 4, 1, 15)
+#endif
+#ifdef HAS_RGBATOYJROW_LASX
+ANY11(RGBAToYJRow_Any_LASX, RGBAToYJRow_LASX, 0, 4, 1, 31)
+#endif
#ifdef HAS_ARGBTOYJROW_LASX
ANY11(ARGBToYJRow_Any_LASX, ARGBToYJRow_LASX, 0, 4, 1, 31)
#endif
+#ifdef HAS_ABGRTOYJROW_LASX
+ANY11(ABGRToYJRow_Any_LASX, ABGRToYJRow_LASX, 0, 4, 1, 31)
+#endif
#ifdef HAS_BGRATOYROW_NEON
ANY11(BGRAToYRow_Any_NEON, BGRAToYRow_NEON, 0, 4, 1, 15)
#endif
@@ -1019,6 +1120,9 @@ ANY11(BGRAToYRow_Any_MSA, BGRAToYRow_MSA, 0, 4, 1, 15)
#ifdef HAS_BGRATOYROW_LSX
ANY11(BGRAToYRow_Any_LSX, BGRAToYRow_LSX, 0, 4, 1, 15)
#endif
+#ifdef HAS_BGRATOYROW_LASX
+ANY11(BGRAToYRow_Any_LASX, BGRAToYRow_LASX, 0, 4, 1, 31)
+#endif
#ifdef HAS_ABGRTOYROW_NEON
ANY11(ABGRToYRow_Any_NEON, ABGRToYRow_NEON, 0, 4, 1, 15)
#endif
@@ -1028,6 +1132,9 @@ ANY11(ABGRToYRow_Any_MSA, ABGRToYRow_MSA, 0, 4, 1, 7)
#ifdef HAS_ABGRTOYROW_LSX
ANY11(ABGRToYRow_Any_LSX, ABGRToYRow_LSX, 0, 4, 1, 15)
#endif
+#ifdef HAS_ABGRTOYROW_LASX
+ANY11(ABGRToYRow_Any_LASX, ABGRToYRow_LASX, 0, 4, 1, 31)
+#endif
#ifdef HAS_RGBATOYROW_NEON
ANY11(RGBAToYRow_Any_NEON, RGBAToYRow_NEON, 0, 4, 1, 15)
#endif
@@ -1037,6 +1144,9 @@ ANY11(RGBAToYRow_Any_MSA, RGBAToYRow_MSA, 0, 4, 1, 15)
#ifdef HAS_RGBATOYROW_LSX
ANY11(RGBAToYRow_Any_LSX, RGBAToYRow_LSX, 0, 4, 1, 15)
#endif
+#ifdef HAS_RGBATOYROW_LASX
+ANY11(RGBAToYRow_Any_LASX, RGBAToYRow_LASX, 0, 4, 1, 31)
+#endif
#ifdef HAS_RGB24TOYROW_NEON
ANY11(RGB24ToYRow_Any_NEON, RGB24ToYRow_NEON, 0, 3, 1, 15)
#endif
@@ -1055,6 +1165,12 @@ ANY11(RGB24ToYRow_Any_MSA, RGB24ToYRow_MSA, 0, 3, 1, 15)
#ifdef HAS_RGB24TOYROW_LSX
ANY11(RGB24ToYRow_Any_LSX, RGB24ToYRow_LSX, 0, 3, 1, 15)
#endif
+#ifdef HAS_RGB24TOYJROW_LSX
+ANY11(RGB24ToYJRow_Any_LSX, RGB24ToYJRow_LSX, 0, 3, 1, 15)
+#endif
+#ifdef HAS_RGB24TOYJROW_LASX
+ANY11(RGB24ToYJRow_Any_LASX, RGB24ToYJRow_LASX, 0, 3, 1, 31)
+#endif
#ifdef HAS_RGB24TOYROW_LASX
ANY11(RGB24ToYRow_Any_LASX, RGB24ToYRow_LASX, 0, 3, 1, 31)
#endif
@@ -1079,6 +1195,12 @@ ANY11(RAWToYRow_Any_LSX, RAWToYRow_LSX, 0, 3, 1, 15)
#ifdef HAS_RAWTOYROW_LASX
ANY11(RAWToYRow_Any_LASX, RAWToYRow_LASX, 0, 3, 1, 31)
#endif
+#ifdef HAS_RAWTOYJROW_LSX
+ANY11(RAWToYJRow_Any_LSX, RAWToYJRow_LSX, 0, 3, 1, 15)
+#endif
+#ifdef HAS_RAWTOYJROW_LASX
+ANY11(RAWToYJRow_Any_LASX, RAWToYJRow_LASX, 0, 3, 1, 31)
+#endif
#ifdef HAS_RGB565TOYROW_NEON
ANY11(RGB565ToYRow_Any_NEON, RGB565ToYRow_NEON, 0, 2, 1, 7)
#endif
@@ -1115,12 +1237,18 @@ ANY11(UYVYToYRow_Any_NEON, UYVYToYRow_NEON, 1, 4, 1, 15)
#ifdef HAS_YUY2TOYROW_MSA
ANY11(YUY2ToYRow_Any_MSA, YUY2ToYRow_MSA, 1, 4, 1, 31)
#endif
+#ifdef HAS_YUY2TOYROW_LSX
+ANY11(YUY2ToYRow_Any_LSX, YUY2ToYRow_LSX, 1, 4, 1, 15)
+#endif
#ifdef HAS_YUY2TOYROW_LASX
ANY11(YUY2ToYRow_Any_LASX, YUY2ToYRow_LASX, 1, 4, 1, 31)
#endif
#ifdef HAS_UYVYTOYROW_MSA
ANY11(UYVYToYRow_Any_MSA, UYVYToYRow_MSA, 1, 4, 1, 31)
#endif
+#ifdef HAS_UYVYTOYROW_LSX
+ANY11(UYVYToYRow_Any_LSX, UYVYToYRow_LSX, 1, 4, 1, 15)
+#endif
#ifdef HAS_UYVYTOYROW_LASX
ANY11(UYVYToYRow_Any_LASX, UYVYToYRow_LASX, 1, 4, 1, 31)
#endif
@@ -1217,6 +1345,9 @@ ANY11(ARGBAttenuateRow_Any_NEON, ARGBAttenuateRow_NEON, 0, 4, 4, 7)
#ifdef HAS_ARGBATTENUATEROW_MSA
ANY11(ARGBAttenuateRow_Any_MSA, ARGBAttenuateRow_MSA, 0, 4, 4, 7)
#endif
+#ifdef HAS_ARGBATTENUATEROW_LSX
+ANY11(ARGBAttenuateRow_Any_LSX, ARGBAttenuateRow_LSX, 0, 4, 4, 7)
+#endif
#ifdef HAS_ARGBATTENUATEROW_LASX
ANY11(ARGBAttenuateRow_Any_LASX, ARGBAttenuateRow_LASX, 0, 4, 4, 15)
#endif
@@ -1238,19 +1369,21 @@ ANY11(ARGBExtractAlphaRow_Any_LSX, ARGBExtractAlphaRow_LSX, 0, 4, 1, 15)
#undef ANY11
// Any 1 to 1 blended. Destination is read, modify, write.
-#define ANY11B(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, BPP, MASK) \
- void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_ptr, int width) { \
- SIMD_ALIGNED(uint8_t temp[64 * 2]); \
- memset(temp, 0, 64 * 2); /* for msan */ \
- int r = width & MASK; \
- int n = width & ~MASK; \
- if (n > 0) { \
- ANY_SIMD(src_ptr, dst_ptr, n); \
- } \
- memcpy(temp, src_ptr + (n >> UVSHIFT) * SBPP, SS(r, UVSHIFT) * SBPP); \
- memcpy(temp + 64, dst_ptr + n * BPP, r * BPP); \
- ANY_SIMD(temp, temp + 64, MASK + 1); \
- memcpy(dst_ptr + n * BPP, temp + 64, r * BPP); \
+#define ANY11B(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, BPP, MASK) \
+ void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_ptr, int width) { \
+ SIMD_ALIGNED(uint8_t vin[64]); \
+ SIMD_ALIGNED(uint8_t vout[64]); \
+ memset(vin, 0, sizeof(vin)); /* for msan */ \
+ memset(vout, 0, sizeof(vout)); /* for msan */ \
+ int r = width & MASK; \
+ int n = width & ~MASK; \
+ if (n > 0) { \
+ ANY_SIMD(src_ptr, dst_ptr, n); \
+ } \
+ memcpy(vin, src_ptr + (n >> UVSHIFT) * SBPP, SS(r, UVSHIFT) * SBPP); \
+ memcpy(vout, dst_ptr + n * BPP, r * BPP); \
+ ANY_SIMD(vin, vout, MASK + 1); \
+ memcpy(dst_ptr + n * BPP, vout, r * BPP); \
}
#ifdef HAS_ARGBCOPYALPHAROW_AVX2
@@ -1270,16 +1403,17 @@ ANY11B(ARGBCopyYToAlphaRow_Any_SSE2, ARGBCopyYToAlphaRow_SSE2, 0, 1, 4, 7)
// Any 1 to 1 with parameter.
#define ANY11P(NAMEANY, ANY_SIMD, T, SBPP, BPP, MASK) \
void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_ptr, T param, int width) { \
- SIMD_ALIGNED(uint8_t temp[64 * 2]); \
- memset(temp, 0, 64); /* for msan */ \
+ SIMD_ALIGNED(uint8_t vin[64]); \
+ SIMD_ALIGNED(uint8_t vout[64]); \
+ memset(vin, 0, sizeof(vin)); /* for msan */ \
int r = width & MASK; \
int n = width & ~MASK; \
if (n > 0) { \
ANY_SIMD(src_ptr, dst_ptr, param, n); \
} \
- memcpy(temp, src_ptr + n * SBPP, r * SBPP); \
- ANY_SIMD(temp, temp + 64, param, MASK + 1); \
- memcpy(dst_ptr + n * BPP, temp + 64, r * BPP); \
+ memcpy(vin, src_ptr + n * SBPP, r * SBPP); \
+ ANY_SIMD(vin, vout, param, MASK + 1); \
+ memcpy(dst_ptr + n * BPP, vout, r * BPP); \
}
#if defined(HAS_I400TOARGBROW_SSE2)
@@ -1355,6 +1489,14 @@ ANY11P(ARGBToRGB565DitherRow_Any_MSA,
2,
7)
#endif
+#if defined(HAS_ARGBTORGB565DITHERROW_LSX)
+ANY11P(ARGBToRGB565DitherRow_Any_LSX,
+ ARGBToRGB565DitherRow_LSX,
+ const uint32_t,
+ 4,
+ 2,
+ 7)
+#endif
#if defined(HAS_ARGBTORGB565DITHERROW_LASX)
ANY11P(ARGBToRGB565DitherRow_Any_LASX,
ARGBToRGB565DitherRow_LASX,
@@ -1375,6 +1517,9 @@ ANY11P(ARGBShuffleRow_Any_NEON, ARGBShuffleRow_NEON, const uint8_t*, 4, 4, 3)
#ifdef HAS_ARGBSHUFFLEROW_MSA
ANY11P(ARGBShuffleRow_Any_MSA, ARGBShuffleRow_MSA, const uint8_t*, 4, 4, 7)
#endif
+#ifdef HAS_ARGBSHUFFLEROW_LSX
+ANY11P(ARGBShuffleRow_Any_LSX, ARGBShuffleRow_LSX, const uint8_t*, 4, 4, 7)
+#endif
#ifdef HAS_ARGBSHUFFLEROW_LASX
ANY11P(ARGBShuffleRow_Any_LASX, ARGBShuffleRow_LASX, const uint8_t*, 4, 4, 15)
#endif
@@ -1384,17 +1529,17 @@ ANY11P(ARGBShuffleRow_Any_LASX, ARGBShuffleRow_LASX, const uint8_t*, 4, 4, 15)
// Any 1 to 1 with type
#define ANY11T(NAMEANY, ANY_SIMD, SBPP, BPP, STYPE, DTYPE, MASK) \
void NAMEANY(const STYPE* src_ptr, DTYPE* dst_ptr, int width) { \
- SIMD_ALIGNED(uint8_t temp[(MASK + 1) * SBPP]); \
- SIMD_ALIGNED(uint8_t out[(MASK + 1) * BPP]); \
- memset(temp, 0, (MASK + 1) * SBPP); /* for msan */ \
+ SIMD_ALIGNED(uint8_t vin[(MASK + 1) * SBPP]); \
+ SIMD_ALIGNED(uint8_t vout[(MASK + 1) * BPP]); \
+ memset(vin, 0, sizeof(vin)); /* for msan */ \
int r = width & MASK; \
int n = width & ~MASK; \
if (n > 0) { \
ANY_SIMD(src_ptr, dst_ptr, n); \
} \
- memcpy(temp, (uint8_t*)(src_ptr) + n * SBPP, r * SBPP); \
- ANY_SIMD((STYPE*)temp, (DTYPE*)out, MASK + 1); \
- memcpy((uint8_t*)(dst_ptr) + n * BPP, out, r * BPP); \
+ memcpy(vin, (uint8_t*)(src_ptr) + n * SBPP, r * SBPP); \
+ ANY_SIMD((STYPE*)vin, (DTYPE*)vout, MASK + 1); \
+ memcpy((uint8_t*)(dst_ptr) + n * BPP, vout, r * BPP); \
}
#ifdef HAS_ARGBTOAR64ROW_SSSE3
@@ -1450,17 +1595,17 @@ ANY11T(AB64ToARGBRow_Any_NEON, AB64ToARGBRow_NEON, 8, 4, uint16_t, uint8_t, 7)
// Any 1 to 1 with parameter and shorts. BPP measures in shorts.
#define ANY11C(NAMEANY, ANY_SIMD, SBPP, BPP, STYPE, DTYPE, MASK) \
void NAMEANY(const STYPE* src_ptr, DTYPE* dst_ptr, int scale, int width) { \
- SIMD_ALIGNED(STYPE temp[32]); \
- SIMD_ALIGNED(DTYPE out[32]); \
- memset(temp, 0, 32 * SBPP); /* for msan */ \
+ SIMD_ALIGNED(STYPE vin[32]); \
+ SIMD_ALIGNED(DTYPE vout[32]); \
+ memset(vin, 0, sizeof(vin)); /* for msan */ \
int r = width & MASK; \
int n = width & ~MASK; \
if (n > 0) { \
ANY_SIMD(src_ptr, dst_ptr, scale, n); \
} \
- memcpy(temp, src_ptr + n, r * SBPP); \
- ANY_SIMD(temp, out, scale, MASK + 1); \
- memcpy(dst_ptr + n, out, r * BPP); \
+ memcpy(vin, src_ptr + n, r * SBPP); \
+ ANY_SIMD(vin, vout, scale, MASK + 1); \
+ memcpy(dst_ptr + n, vout, r * BPP); \
}
#ifdef HAS_CONVERT16TO8ROW_SSSE3
@@ -1537,17 +1682,17 @@ ANY11C(DivideRow_16_Any_NEON, DivideRow_16_NEON, 2, 2, uint16_t, uint16_t, 15)
// Any 1 to 1 with parameter and shorts to byte. BPP measures in shorts.
#define ANY11P16(NAMEANY, ANY_SIMD, ST, T, SBPP, BPP, MASK) \
void NAMEANY(const ST* src_ptr, T* dst_ptr, float param, int width) { \
- SIMD_ALIGNED(ST temp[32]); \
- SIMD_ALIGNED(T out[32]); \
- memset(temp, 0, SBPP * 32); /* for msan */ \
+ SIMD_ALIGNED(ST vin[32]); \
+ SIMD_ALIGNED(T vout[32]); \
+ memset(vin, 0, sizeof(vin)); /* for msan */ \
int r = width & MASK; \
int n = width & ~MASK; \
if (n > 0) { \
ANY_SIMD(src_ptr, dst_ptr, param, n); \
} \
- memcpy(temp, src_ptr + n, r * SBPP); \
- ANY_SIMD(temp, out, param, MASK + 1); \
- memcpy(dst_ptr + n, out, r * BPP); \
+ memcpy(vin, src_ptr + n, r * SBPP); \
+ ANY_SIMD(vin, vout, param, MASK + 1); \
+ memcpy(dst_ptr + n, vout, r * BPP); \
}
#ifdef HAS_HALFFLOATROW_SSE2
@@ -1588,20 +1733,22 @@ ANY11P16(HalfFloatRow_Any_LSX, HalfFloatRow_LSX, uint16_t, uint16_t, 2, 2, 31)
#undef ANY11P16
// Any 1 to 1 with yuvconstants
-#define ANY11C(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, BPP, MASK) \
- void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_ptr, \
- const struct YuvConstants* yuvconstants, int width) { \
- SIMD_ALIGNED(uint8_t temp[128 * 2]); \
- memset(temp, 0, 128); /* for YUY2 and msan */ \
- int r = width & MASK; \
- int n = width & ~MASK; \
- if (n > 0) { \
- ANY_SIMD(src_ptr, dst_ptr, yuvconstants, n); \
- } \
- memcpy(temp, src_ptr + (n >> UVSHIFT) * SBPP, SS(r, UVSHIFT) * SBPP); \
- ANY_SIMD(temp, temp + 128, yuvconstants, MASK + 1); \
- memcpy(dst_ptr + n * BPP, temp + 128, r * BPP); \
+#define ANY11C(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, BPP, MASK) \
+ void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_ptr, \
+ const struct YuvConstants* yuvconstants, int width) { \
+ SIMD_ALIGNED(uint8_t vin[128]); \
+ SIMD_ALIGNED(uint8_t vout[128]); \
+ memset(vin, 0, sizeof(vin)); /* for YUY2 and msan */ \
+ int r = width & MASK; \
+ int n = width & ~MASK; \
+ if (n > 0) { \
+ ANY_SIMD(src_ptr, dst_ptr, yuvconstants, n); \
+ } \
+ memcpy(vin, src_ptr + (n >> UVSHIFT) * SBPP, SS(r, UVSHIFT) * SBPP); \
+ ANY_SIMD(vin, vout, yuvconstants, MASK + 1); \
+ memcpy(dst_ptr + n * BPP, vout, r * BPP); \
}
+
#if defined(HAS_YUY2TOARGBROW_SSSE3)
ANY11C(YUY2ToARGBRow_Any_SSSE3, YUY2ToARGBRow_SSSE3, 1, 4, 4, 15)
ANY11C(UYVYToARGBRow_Any_SSSE3, UYVYToARGBRow_SSSE3, 1, 4, 4, 15)
@@ -1628,21 +1775,21 @@ ANY11C(UYVYToARGBRow_Any_LSX, UYVYToARGBRow_LSX, 1, 4, 4, 7)
#define ANY11I(NAMEANY, ANY_SIMD, TD, TS, SBPP, BPP, MASK) \
void NAMEANY(TD* dst_ptr, const TS* src_ptr, ptrdiff_t src_stride, \
int width, int source_y_fraction) { \
- SIMD_ALIGNED(TS temps[64 * 2]); \
- SIMD_ALIGNED(TD tempd[64]); \
- memset(temps, 0, sizeof(temps)); /* for msan */ \
+ SIMD_ALIGNED(TS vin[64 * 2]); \
+ SIMD_ALIGNED(TD vout[64]); \
+ memset(vin, 0, sizeof(vin)); /* for msan */ \
int r = width & MASK; \
int n = width & ~MASK; \
if (n > 0) { \
ANY_SIMD(dst_ptr, src_ptr, src_stride, n, source_y_fraction); \
} \
- memcpy(temps, src_ptr + n * SBPP, r * SBPP * sizeof(TS)); \
+ memcpy(vin, src_ptr + n * SBPP, r * SBPP * sizeof(TS)); \
if (source_y_fraction) { \
- memcpy(temps + 64, src_ptr + src_stride + n * SBPP, \
+ memcpy(vin + 64, src_ptr + src_stride + n * SBPP, \
r * SBPP * sizeof(TS)); \
} \
- ANY_SIMD(tempd, temps, 64, MASK + 1, source_y_fraction); \
- memcpy(dst_ptr + n * BPP, tempd, r * BPP * sizeof(TD)); \
+ ANY_SIMD(vout, vin, 64, MASK + 1, source_y_fraction); \
+ memcpy(dst_ptr + n * BPP, vout, r * BPP * sizeof(TD)); \
}
#ifdef HAS_INTERPOLATEROW_AVX2
@@ -1682,21 +1829,21 @@ ANY11I(InterpolateRow_16_Any_NEON,
#define ANY11IS(NAMEANY, ANY_SIMD, TD, TS, SBPP, BPP, MASK) \
void NAMEANY(TD* dst_ptr, const TS* src_ptr, ptrdiff_t src_stride, \
int scale, int width, int source_y_fraction) { \
- SIMD_ALIGNED(TS temps[64 * 2]); \
- SIMD_ALIGNED(TD tempd[64]); \
- memset(temps, 0, sizeof(temps)); /* for msan */ \
+ SIMD_ALIGNED(TS vin[64 * 2]); \
+ SIMD_ALIGNED(TD vout[64]); \
+ memset(vin, 0, sizeof(vin)); /* for msan */ \
int r = width & MASK; \
int n = width & ~MASK; \
if (n > 0) { \
ANY_SIMD(dst_ptr, src_ptr, src_stride, scale, n, source_y_fraction); \
} \
- memcpy(temps, src_ptr + n * SBPP, r * SBPP * sizeof(TS)); \
+ memcpy(vin, src_ptr + n * SBPP, r * SBPP * sizeof(TS)); \
if (source_y_fraction) { \
- memcpy(temps + 64, src_ptr + src_stride + n * SBPP, \
+ memcpy(vin + 64, src_ptr + src_stride + n * SBPP, \
r * SBPP * sizeof(TS)); \
} \
- ANY_SIMD(tempd, temps, 64, scale, MASK + 1, source_y_fraction); \
- memcpy(dst_ptr + n * BPP, tempd, r * BPP * sizeof(TD)); \
+ ANY_SIMD(vout, vin, 64, scale, MASK + 1, source_y_fraction); \
+ memcpy(dst_ptr + n * BPP, vout, r * BPP * sizeof(TD)); \
}
#ifdef HAS_INTERPOLATEROW_16TO8_NEON
@@ -1721,18 +1868,19 @@ ANY11IS(InterpolateRow_16To8_Any_AVX2,
#undef ANY11IS
// Any 1 to 1 mirror.
-#define ANY11M(NAMEANY, ANY_SIMD, BPP, MASK) \
- void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_ptr, int width) { \
- SIMD_ALIGNED(uint8_t temp[64 * 2]); \
- memset(temp, 0, 64); /* for msan */ \
- int r = width & MASK; \
- int n = width & ~MASK; \
- if (n > 0) { \
- ANY_SIMD(src_ptr + r * BPP, dst_ptr, n); \
- } \
- memcpy(temp, src_ptr, r* BPP); \
- ANY_SIMD(temp, temp + 64, MASK + 1); \
- memcpy(dst_ptr + n * BPP, temp + 64 + (MASK + 1 - r) * BPP, r * BPP); \
+#define ANY11M(NAMEANY, ANY_SIMD, BPP, MASK) \
+ void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_ptr, int width) { \
+ SIMD_ALIGNED(uint8_t vin[64]); \
+ SIMD_ALIGNED(uint8_t vout[64]); \
+ memset(vin, 0, sizeof(vin)); /* for msan */ \
+ int r = width & MASK; \
+ int n = width & ~MASK; \
+ if (n > 0) { \
+ ANY_SIMD(src_ptr + r * BPP, dst_ptr, n); \
+ } \
+ memcpy(vin, src_ptr, r* BPP); \
+ ANY_SIMD(vin, vout, MASK + 1); \
+ memcpy(dst_ptr + n * BPP, vout + (MASK + 1 - r) * BPP, r * BPP); \
}
#ifdef HAS_MIRRORROW_AVX2
@@ -1747,6 +1895,9 @@ ANY11M(MirrorRow_Any_NEON, MirrorRow_NEON, 1, 31)
#ifdef HAS_MIRRORROW_MSA
ANY11M(MirrorRow_Any_MSA, MirrorRow_MSA, 1, 63)
#endif
+#ifdef HAS_MIRRORROW_LSX
+ANY11M(MirrorRow_Any_LSX, MirrorRow_LSX, 1, 31)
+#endif
#ifdef HAS_MIRRORROW_LASX
ANY11M(MirrorRow_Any_LASX, MirrorRow_LASX, 1, 63)
#endif
@@ -1762,6 +1913,9 @@ ANY11M(MirrorUVRow_Any_NEON, MirrorUVRow_NEON, 2, 31)
#ifdef HAS_MIRRORUVROW_MSA
ANY11M(MirrorUVRow_Any_MSA, MirrorUVRow_MSA, 2, 7)
#endif
+#ifdef HAS_MIRRORUVROW_LSX
+ANY11M(MirrorUVRow_Any_LSX, MirrorUVRow_LSX, 2, 7)
+#endif
#ifdef HAS_MIRRORUVROW_LASX
ANY11M(MirrorUVRow_Any_LASX, MirrorUVRow_LASX, 2, 15)
#endif
@@ -1777,6 +1931,9 @@ ANY11M(ARGBMirrorRow_Any_NEON, ARGBMirrorRow_NEON, 4, 7)
#ifdef HAS_ARGBMIRRORROW_MSA
ANY11M(ARGBMirrorRow_Any_MSA, ARGBMirrorRow_MSA, 4, 15)
#endif
+#ifdef HAS_ARGBMIRRORROW_LSX
+ANY11M(ARGBMirrorRow_Any_LSX, ARGBMirrorRow_LSX, 4, 7)
+#endif
#ifdef HAS_ARGBMIRRORROW_LASX
ANY11M(ARGBMirrorRow_Any_LASX, ARGBMirrorRow_LASX, 4, 15)
#endif
@@ -1791,15 +1948,14 @@ ANY11M(RGB24MirrorRow_Any_NEON, RGB24MirrorRow_NEON, 3, 15)
// Any 1 plane. (memset)
#define ANY1(NAMEANY, ANY_SIMD, T, BPP, MASK) \
void NAMEANY(uint8_t* dst_ptr, T v32, int width) { \
- SIMD_ALIGNED(uint8_t temp[64]); \
- memset(temp, 0, 64); /* for msan */ \
+ SIMD_ALIGNED(uint8_t vout[64]); \
int r = width & MASK; \
int n = width & ~MASK; \
if (n > 0) { \
ANY_SIMD(dst_ptr, v32, n); \
} \
- ANY_SIMD(temp, v32, MASK + 1); \
- memcpy(dst_ptr + n * BPP, temp, r * BPP); \
+ ANY_SIMD(vout, v32, MASK + 1); \
+ memcpy(dst_ptr + n * BPP, vout, r * BPP); \
}
#ifdef HAS_SETROW_X86
@@ -1823,20 +1979,21 @@ ANY1(ARGBSetRow_Any_LSX, ARGBSetRow_LSX, uint32_t, 4, 3)
#undef ANY1
// Any 1 to 2. Outputs UV planes.
-#define ANY12(NAMEANY, ANY_SIMD, UVSHIFT, BPP, DUVSHIFT, MASK) \
- void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_u, uint8_t* dst_v, \
- int width) { \
- SIMD_ALIGNED(uint8_t temp[128 * 3]); \
- memset(temp, 0, 128); /* for msan */ \
- int r = width & MASK; \
- int n = width & ~MASK; \
- if (n > 0) { \
- ANY_SIMD(src_ptr, dst_u, dst_v, n); \
- } \
- memcpy(temp, src_ptr + (n >> UVSHIFT) * BPP, SS(r, UVSHIFT) * BPP); \
- ANY_SIMD(temp, temp + 128, temp + 256, MASK + 1); \
- memcpy(dst_u + (n >> DUVSHIFT), temp + 128, SS(r, DUVSHIFT)); \
- memcpy(dst_v + (n >> DUVSHIFT), temp + 256, SS(r, DUVSHIFT)); \
+#define ANY12(NAMEANY, ANY_SIMD, UVSHIFT, BPP, DUVSHIFT, MASK) \
+ void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_u, uint8_t* dst_v, \
+ int width) { \
+ SIMD_ALIGNED(uint8_t vin[128]); \
+ SIMD_ALIGNED(uint8_t vout[128 * 2]); \
+ memset(vin, 0, sizeof(vin)); /* for msan */ \
+ int r = width & MASK; \
+ int n = width & ~MASK; \
+ if (n > 0) { \
+ ANY_SIMD(src_ptr, dst_u, dst_v, n); \
+ } \
+ memcpy(vin, src_ptr + (n >> UVSHIFT) * BPP, SS(r, UVSHIFT) * BPP); \
+ ANY_SIMD(vin, vout, vout + 128, MASK + 1); \
+ memcpy(dst_u + (n >> DUVSHIFT), vout, SS(r, DUVSHIFT)); \
+ memcpy(dst_v + (n >> DUVSHIFT), vout + 128, SS(r, DUVSHIFT)); \
}
#ifdef HAS_SPLITUVROW_SSE2
@@ -1875,6 +2032,11 @@ ANY12(ARGBToUV444Row_Any_MSA, ARGBToUV444Row_MSA, 0, 4, 0, 15)
ANY12(YUY2ToUV422Row_Any_MSA, YUY2ToUV422Row_MSA, 1, 4, 1, 31)
ANY12(UYVYToUV422Row_Any_MSA, UYVYToUV422Row_MSA, 1, 4, 1, 31)
#endif
+#ifdef HAS_YUY2TOUV422ROW_LSX
+ANY12(ARGBToUV444Row_Any_LSX, ARGBToUV444Row_LSX, 0, 4, 0, 15)
+ANY12(YUY2ToUV422Row_Any_LSX, YUY2ToUV422Row_LSX, 1, 4, 1, 15)
+ANY12(UYVYToUV422Row_Any_LSX, UYVYToUV422Row_LSX, 1, 4, 1, 15)
+#endif
#ifdef HAS_YUY2TOUV422ROW_LASX
ANY12(ARGBToUV444Row_Any_LASX, ARGBToUV444Row_LASX, 0, 4, 0, 31)
ANY12(YUY2ToUV422Row_Any_LASX, YUY2ToUV422Row_LASX, 1, 4, 1, 31)
@@ -1885,17 +2047,18 @@ ANY12(UYVYToUV422Row_Any_LASX, UYVYToUV422Row_LASX, 1, 4, 1, 31)
// Any 2 16 bit planes with parameter to 1
#define ANY12PT(NAMEANY, ANY_SIMD, T, BPP, MASK) \
void NAMEANY(const T* src_uv, T* dst_u, T* dst_v, int depth, int width) { \
- SIMD_ALIGNED(T temp[16 * 4]); \
- memset(temp, 0, 16 * 4 * BPP); /* for msan */ \
+ SIMD_ALIGNED(T vin[16 * 2]); \
+ SIMD_ALIGNED(T vout[16 * 2]); \
+ memset(vin, 0, sizeof(vin)); /* for msan */ \
int r = width & MASK; \
int n = width & ~MASK; \
if (n > 0) { \
ANY_SIMD(src_uv, dst_u, dst_v, depth, n); \
} \
- memcpy(temp, src_uv + n * 2, r * BPP * 2); \
- ANY_SIMD(temp, temp + 32, temp + 48, depth, MASK + 1); \
- memcpy(dst_u + n, temp + 32, r * BPP); \
- memcpy(dst_v + n, temp + 48, r * BPP); \
+ memcpy(vin, src_uv + n * 2, r * BPP * 2); \
+ ANY_SIMD(vin, vout, vout + 16, depth, MASK + 1); \
+ memcpy(dst_u + n, vout, r * BPP); \
+ memcpy(dst_v + n, vout + 16, r * BPP); \
}
#ifdef HAS_SPLITUVROW_16_AVX2
@@ -1909,21 +2072,22 @@ ANY12PT(SplitUVRow_16_Any_NEON, SplitUVRow_16_NEON, uint16_t, 2, 7)
#undef ANY21CT
// Any 1 to 3. Outputs RGB planes.
-#define ANY13(NAMEANY, ANY_SIMD, BPP, MASK) \
- void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_r, uint8_t* dst_g, \
- uint8_t* dst_b, int width) { \
- SIMD_ALIGNED(uint8_t temp[16 * 6]); \
- memset(temp, 0, 16 * 3); /* for msan */ \
- int r = width & MASK; \
- int n = width & ~MASK; \
- if (n > 0) { \
- ANY_SIMD(src_ptr, dst_r, dst_g, dst_b, n); \
- } \
- memcpy(temp, src_ptr + n * BPP, r * BPP); \
- ANY_SIMD(temp, temp + 16 * 3, temp + 16 * 4, temp + 16 * 5, MASK + 1); \
- memcpy(dst_r + n, temp + 16 * 3, r); \
- memcpy(dst_g + n, temp + 16 * 4, r); \
- memcpy(dst_b + n, temp + 16 * 5, r); \
+#define ANY13(NAMEANY, ANY_SIMD, BPP, MASK) \
+ void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_r, uint8_t* dst_g, \
+ uint8_t* dst_b, int width) { \
+ SIMD_ALIGNED(uint8_t vin[16 * 3]); \
+ SIMD_ALIGNED(uint8_t vout[16 * 3]); \
+ memset(vin, 0, sizeof(vin)); /* for msan */ \
+ int r = width & MASK; \
+ int n = width & ~MASK; \
+ if (n > 0) { \
+ ANY_SIMD(src_ptr, dst_r, dst_g, dst_b, n); \
+ } \
+ memcpy(vin, src_ptr + n * BPP, r * BPP); \
+ ANY_SIMD(vin, vout, vout + 16, vout + 32, MASK + 1); \
+ memcpy(dst_r + n, vout, r); \
+ memcpy(dst_g + n, vout + 16, r); \
+ memcpy(dst_b + n, vout + 32, r); \
}
#ifdef HAS_SPLITRGBROW_SSSE3
@@ -1946,23 +2110,23 @@ ANY13(SplitXRGBRow_Any_NEON, SplitXRGBRow_NEON, 4, 15)
#endif
// Any 1 to 4. Outputs ARGB planes.
-#define ANY14(NAMEANY, ANY_SIMD, BPP, MASK) \
- void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_r, uint8_t* dst_g, \
- uint8_t* dst_b, uint8_t* dst_a, int width) { \
- SIMD_ALIGNED(uint8_t temp[16 * 8]); \
- memset(temp, 0, 16 * 4); /* for msan */ \
- int r = width & MASK; \
- int n = width & ~MASK; \
- if (n > 0) { \
- ANY_SIMD(src_ptr, dst_r, dst_g, dst_b, dst_a, n); \
- } \
- memcpy(temp, src_ptr + n * BPP, r * BPP); \
- ANY_SIMD(temp, temp + 16 * 4, temp + 16 * 5, temp + 16 * 6, temp + 16 * 7, \
- MASK + 1); \
- memcpy(dst_r + n, temp + 16 * 4, r); \
- memcpy(dst_g + n, temp + 16 * 5, r); \
- memcpy(dst_b + n, temp + 16 * 6, r); \
- memcpy(dst_a + n, temp + 16 * 7, r); \
+#define ANY14(NAMEANY, ANY_SIMD, BPP, MASK) \
+ void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_r, uint8_t* dst_g, \
+ uint8_t* dst_b, uint8_t* dst_a, int width) { \
+ SIMD_ALIGNED(uint8_t vin[16 * 4]); \
+ SIMD_ALIGNED(uint8_t vout[16 * 4]); \
+ memset(vin, 0, sizeof(vin)); /* for msan */ \
+ int r = width & MASK; \
+ int n = width & ~MASK; \
+ if (n > 0) { \
+ ANY_SIMD(src_ptr, dst_r, dst_g, dst_b, dst_a, n); \
+ } \
+ memcpy(vin, src_ptr + n * BPP, r * BPP); \
+ ANY_SIMD(vin, vout, vout + 16, vout + 32, vout + 48, MASK + 1); \
+ memcpy(dst_r + n, vout, r); \
+ memcpy(dst_g + n, vout + 16, r); \
+ memcpy(dst_b + n, vout + 32, r); \
+ memcpy(dst_a + n, vout + 48, r); \
}
#ifdef HAS_SPLITARGBROW_SSE2
@@ -1983,25 +2147,26 @@ ANY14(SplitARGBRow_Any_NEON, SplitARGBRow_NEON, 4, 15)
#define ANY12S(NAMEANY, ANY_SIMD, UVSHIFT, BPP, MASK) \
void NAMEANY(const uint8_t* src_ptr, int src_stride, uint8_t* dst_u, \
uint8_t* dst_v, int width) { \
- SIMD_ALIGNED(uint8_t temp[128 * 4]); \
- memset(temp, 0, 128 * 2); /* for msan */ \
+ SIMD_ALIGNED(uint8_t vin[128 * 2]); \
+ SIMD_ALIGNED(uint8_t vout[128 * 2]); \
+ memset(vin, 0, sizeof(vin)); /* for msan */ \
int r = width & MASK; \
int n = width & ~MASK; \
if (n > 0) { \
ANY_SIMD(src_ptr, src_stride, dst_u, dst_v, n); \
} \
- memcpy(temp, src_ptr + (n >> UVSHIFT) * BPP, SS(r, UVSHIFT) * BPP); \
- memcpy(temp + 128, src_ptr + src_stride + (n >> UVSHIFT) * BPP, \
+ memcpy(vin, src_ptr + (n >> UVSHIFT) * BPP, SS(r, UVSHIFT) * BPP); \
+ memcpy(vin + 128, src_ptr + src_stride + (n >> UVSHIFT) * BPP, \
SS(r, UVSHIFT) * BPP); \
if ((width & 1) && UVSHIFT == 0) { /* repeat last pixel for subsample */ \
- memcpy(temp + SS(r, UVSHIFT) * BPP, temp + SS(r, UVSHIFT) * BPP - BPP, \
+ memcpy(vin + SS(r, UVSHIFT) * BPP, vin + SS(r, UVSHIFT) * BPP - BPP, \
BPP); \
- memcpy(temp + 128 + SS(r, UVSHIFT) * BPP, \
- temp + 128 + SS(r, UVSHIFT) * BPP - BPP, BPP); \
+ memcpy(vin + 128 + SS(r, UVSHIFT) * BPP, \
+ vin + 128 + SS(r, UVSHIFT) * BPP - BPP, BPP); \
} \
- ANY_SIMD(temp, 128, temp + 256, temp + 384, MASK + 1); \
- memcpy(dst_u + (n >> 1), temp + 256, SS(r, 1)); \
- memcpy(dst_v + (n >> 1), temp + 384, SS(r, 1)); \
+ ANY_SIMD(vin, 128, vout, vout + 128, MASK + 1); \
+ memcpy(dst_u + (n >> 1), vout, SS(r, 1)); \
+ memcpy(dst_v + (n >> 1), vout + 128, SS(r, 1)); \
}
#ifdef HAS_ARGBTOUVROW_AVX2
@@ -2013,9 +2178,17 @@ ANY12S(ABGRToUVRow_Any_AVX2, ABGRToUVRow_AVX2, 0, 4, 31)
#ifdef HAS_ARGBTOUVJROW_AVX2
ANY12S(ARGBToUVJRow_Any_AVX2, ARGBToUVJRow_AVX2, 0, 4, 31)
#endif
+#ifdef HAS_ABGRTOUVJROW_AVX2
+ANY12S(ABGRToUVJRow_Any_AVX2, ABGRToUVJRow_AVX2, 0, 4, 31)
+#endif
+#ifdef HAS_ARGBTOUVJROW_SSSE3
+ANY12S(ARGBToUVJRow_Any_SSSE3, ARGBToUVJRow_SSSE3, 0, 4, 15)
+#endif
+#ifdef HAS_ABGRTOUVJROW_SSSE3
+ANY12S(ABGRToUVJRow_Any_SSSE3, ABGRToUVJRow_SSSE3, 0, 4, 15)
+#endif
#ifdef HAS_ARGBTOUVROW_SSSE3
ANY12S(ARGBToUVRow_Any_SSSE3, ARGBToUVRow_SSSE3, 0, 4, 15)
-ANY12S(ARGBToUVJRow_Any_SSSE3, ARGBToUVJRow_SSSE3, 0, 4, 15)
ANY12S(BGRAToUVRow_Any_SSSE3, BGRAToUVRow_SSSE3, 0, 4, 15)
ANY12S(ABGRToUVRow_Any_SSSE3, ABGRToUVRow_SSSE3, 0, 4, 15)
ANY12S(RGBAToUVRow_Any_SSSE3, RGBAToUVRow_SSSE3, 0, 4, 15)
@@ -2034,12 +2207,18 @@ ANY12S(ARGBToUVRow_Any_NEON, ARGBToUVRow_NEON, 0, 4, 15)
#ifdef HAS_ARGBTOUVROW_MSA
ANY12S(ARGBToUVRow_Any_MSA, ARGBToUVRow_MSA, 0, 4, 31)
#endif
+#ifdef HAS_ARGBTOUVROW_LSX
+ANY12S(ARGBToUVRow_Any_LSX, ARGBToUVRow_LSX, 0, 4, 15)
+#endif
#ifdef HAS_ARGBTOUVROW_LASX
ANY12S(ARGBToUVRow_Any_LASX, ARGBToUVRow_LASX, 0, 4, 31)
#endif
#ifdef HAS_ARGBTOUVJROW_NEON
ANY12S(ARGBToUVJRow_Any_NEON, ARGBToUVJRow_NEON, 0, 4, 15)
#endif
+#ifdef HAS_ABGRTOUVJROW_NEON
+ANY12S(ABGRToUVJRow_Any_NEON, ABGRToUVJRow_NEON, 0, 4, 15)
+#endif
#ifdef HAS_ARGBTOUVJROW_MSA
ANY12S(ARGBToUVJRow_Any_MSA, ARGBToUVJRow_MSA, 0, 4, 31)
#endif
@@ -2142,12 +2321,18 @@ ANY12S(UYVYToUVRow_Any_NEON, UYVYToUVRow_NEON, 1, 4, 15)
#ifdef HAS_YUY2TOUVROW_MSA
ANY12S(YUY2ToUVRow_Any_MSA, YUY2ToUVRow_MSA, 1, 4, 31)
#endif
+#ifdef HAS_YUY2TOUVROW_LSX
+ANY12S(YUY2ToUVRow_Any_LSX, YUY2ToUVRow_LSX, 1, 4, 15)
+#endif
#ifdef HAS_YUY2TOUVROW_LASX
ANY12S(YUY2ToUVRow_Any_LASX, YUY2ToUVRow_LASX, 1, 4, 31)
#endif
#ifdef HAS_UYVYTOUVROW_MSA
ANY12S(UYVYToUVRow_Any_MSA, UYVYToUVRow_MSA, 1, 4, 31)
#endif
+#ifdef HAS_UYVYTOUVROW_LSX
+ANY12S(UYVYToUVRow_Any_LSX, UYVYToUVRow_LSX, 1, 4, 15)
+#endif
#ifdef HAS_UYVYTOUVROW_LASX
ANY12S(UYVYToUVRow_Any_LASX, UYVYToUVRow_LASX, 1, 4, 31)
#endif
@@ -2158,24 +2343,25 @@ ANY12S(UYVYToUVRow_Any_LASX, UYVYToUVRow_LASX, 1, 4, 31)
#define ANY11S(NAMEANY, ANY_SIMD, UVSHIFT, BPP, MASK) \
void NAMEANY(const uint8_t* src_ptr, int src_stride, uint8_t* dst_vu, \
int width) { \
- SIMD_ALIGNED(uint8_t temp[128 * 3]); \
- memset(temp, 0, 128 * 2); /* for msan */ \
+ SIMD_ALIGNED(uint8_t vin[128 * 2]); \
+ SIMD_ALIGNED(uint8_t vout[128]); \
+ memset(vin, 0, sizeof(vin)); /* for msan */ \
int r = width & MASK; \
int n = width & ~MASK; \
if (n > 0) { \
ANY_SIMD(src_ptr, src_stride, dst_vu, n); \
} \
- memcpy(temp, src_ptr + (n >> UVSHIFT) * BPP, SS(r, UVSHIFT) * BPP); \
- memcpy(temp + 128, src_ptr + src_stride + (n >> UVSHIFT) * BPP, \
+ memcpy(vin, src_ptr + (n >> UVSHIFT) * BPP, SS(r, UVSHIFT) * BPP); \
+ memcpy(vin + 128, src_ptr + src_stride + (n >> UVSHIFT) * BPP, \
SS(r, UVSHIFT) * BPP); \
if ((width & 1) && UVSHIFT == 0) { /* repeat last pixel for subsample */ \
- memcpy(temp + SS(r, UVSHIFT) * BPP, temp + SS(r, UVSHIFT) * BPP - BPP, \
+ memcpy(vin + SS(r, UVSHIFT) * BPP, vin + SS(r, UVSHIFT) * BPP - BPP, \
BPP); \
- memcpy(temp + 128 + SS(r, UVSHIFT) * BPP, \
- temp + 128 + SS(r, UVSHIFT) * BPP - BPP, BPP); \
+ memcpy(vin + 128 + SS(r, UVSHIFT) * BPP, \
+ vin + 128 + SS(r, UVSHIFT) * BPP - BPP, BPP); \
} \
- ANY_SIMD(temp, 128, temp + 256, MASK + 1); \
- memcpy(dst_vu + (n >> 1) * 2, temp + 256, SS(r, 1) * 2); \
+ ANY_SIMD(vin, 128, vout, MASK + 1); \
+ memcpy(dst_vu + (n >> 1) * 2, vout, SS(r, 1) * 2); \
}
#ifdef HAS_AYUVTOVUROW_NEON
@@ -2184,42 +2370,53 @@ ANY11S(AYUVToVURow_Any_NEON, AYUVToVURow_NEON, 0, 4, 15)
#endif
#undef ANY11S
-#define ANYDETILE(NAMEANY, ANY_SIMD, MASK) \
- void NAMEANY(const uint8_t* src, ptrdiff_t src_tile_stride, uint8_t* dst, \
- int width) { \
- SIMD_ALIGNED(uint8_t temp[16 * 2]); \
- memset(temp, 0, 16); /* for msan */ \
- int r = width & MASK; \
- int n = width & ~MASK; \
- if (n > 0) { \
- ANY_SIMD(src, src_tile_stride, dst, n); \
- } \
- memcpy(temp, src + (n / 16) * src_tile_stride, r); \
- ANY_SIMD(temp, src_tile_stride, temp + 16, MASK + 1); \
- memcpy(dst + n, temp + 16, r); \
+#define ANYDETILE(NAMEANY, ANY_SIMD, T, BPP, MASK) \
+ void NAMEANY(const T* src, ptrdiff_t src_tile_stride, T* dst, int width) { \
+ SIMD_ALIGNED(T vin[16]); \
+ SIMD_ALIGNED(T vout[16]); \
+ memset(vin, 0, sizeof(vin)); /* for msan */ \
+ int r = width & MASK; \
+ int n = width & ~MASK; \
+ if (n > 0) { \
+ ANY_SIMD(src, src_tile_stride, dst, n); \
+ } \
+ memcpy(vin, src + (n / 16) * src_tile_stride, r * BPP); \
+ ANY_SIMD(vin, src_tile_stride, vout, MASK + 1); \
+ memcpy(dst + n, vout, r * BPP); \
}
#ifdef HAS_DETILEROW_NEON
-ANYDETILE(DetileRow_Any_NEON, DetileRow_NEON, 15)
+ANYDETILE(DetileRow_Any_NEON, DetileRow_NEON, uint8_t, 1, 15)
#endif
#ifdef HAS_DETILEROW_SSE2
-ANYDETILE(DetileRow_Any_SSE2, DetileRow_SSE2, 15)
+ANYDETILE(DetileRow_Any_SSE2, DetileRow_SSE2, uint8_t, 1, 15)
+#endif
+#ifdef HAS_DETILEROW_16_NEON
+ANYDETILE(DetileRow_16_Any_NEON, DetileRow_16_NEON, uint16_t, 2, 15)
+#endif
+#ifdef HAS_DETILEROW_16_SSE2
+ANYDETILE(DetileRow_16_Any_SSE2, DetileRow_16_SSE2, uint16_t, 2, 15)
+#endif
+#ifdef HAS_DETILEROW_16_AVX
+ANYDETILE(DetileRow_16_Any_AVX, DetileRow_16_AVX, uint16_t, 2, 15)
#endif
+// DetileSplitUVRow width is in bytes
#define ANYDETILESPLITUV(NAMEANY, ANY_SIMD, MASK) \
void NAMEANY(const uint8_t* src_uv, ptrdiff_t src_tile_stride, \
uint8_t* dst_u, uint8_t* dst_v, int width) { \
- SIMD_ALIGNED(uint8_t temp[16 * 2]); \
- memset(temp, 0, 16 * 2); /* for msan */ \
+ SIMD_ALIGNED(uint8_t vin[16]); \
+ SIMD_ALIGNED(uint8_t vout[8 * 2]); \
+ memset(vin, 0, sizeof(vin)); /* for msan */ \
int r = width & MASK; \
int n = width & ~MASK; \
if (n > 0) { \
ANY_SIMD(src_uv, src_tile_stride, dst_u, dst_v, n); \
} \
- memcpy(temp, src_uv + (n / 16) * src_tile_stride, r); \
- ANY_SIMD(temp, src_tile_stride, temp + 16, temp + 24, r); \
- memcpy(dst_u + n / 2, temp + 16, (r + 1) / 2); \
- memcpy(dst_v + n / 2, temp + 24, (r + 1) / 2); \
+ memcpy(vin, src_uv + (n / 16) * src_tile_stride, r); \
+ ANY_SIMD(vin, src_tile_stride, vout, vout + 8, r); \
+ memcpy(dst_u + n / 2, vout, (r + 1) / 2); \
+ memcpy(dst_v + n / 2, vout + 8, (r + 1) / 2); \
}
#ifdef HAS_DETILESPLITUVROW_NEON
@@ -2229,6 +2426,33 @@ ANYDETILESPLITUV(DetileSplitUVRow_Any_NEON, DetileSplitUVRow_NEON, 15)
ANYDETILESPLITUV(DetileSplitUVRow_Any_SSSE3, DetileSplitUVRow_SSSE3, 15)
#endif
+#define ANYDETILEMERGE(NAMEANY, ANY_SIMD, MASK) \
+ void NAMEANY(const uint8_t* src_y, ptrdiff_t src_y_tile_stride, \
+ const uint8_t* src_uv, ptrdiff_t src_uv_tile_stride, \
+ uint8_t* dst_yuy2, int width) { \
+ SIMD_ALIGNED(uint8_t vin[16 * 2]); \
+ SIMD_ALIGNED(uint8_t vout[16 * 2]); \
+ memset(vin, 0, sizeof(vin)); /* for msan */ \
+ int r = width & MASK; \
+ int n = width & ~MASK; \
+ if (n > 0) { \
+ ANY_SIMD(src_y, src_y_tile_stride, src_uv, src_uv_tile_stride, dst_yuy2, \
+ n); \
+ } \
+ memcpy(vin, src_y + (n / 16) * src_y_tile_stride, r); \
+ memcpy(vin + 16, src_uv + (n / 16) * src_uv_tile_stride, r); \
+ ANY_SIMD(vin, src_y_tile_stride, vin + 16, src_uv_tile_stride, vout, r); \
+ memcpy(dst_yuy2 + 2 * n, vout, 2 * r); \
+ }
+
+#ifdef HAS_DETILETOYUY2_NEON
+ANYDETILEMERGE(DetileToYUY2_Any_NEON, DetileToYUY2_NEON, 15)
+#endif
+
+#ifdef HAS_DETILETOYUY2_SSE2
+ANYDETILEMERGE(DetileToYUY2_Any_SSE2, DetileToYUY2_SSE2, 15)
+#endif
+
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
diff --git a/files/source/row_common.cc b/files/source/row_common.cc
index 83442496..8be37fb5 100644
--- a/files/source/row_common.cc
+++ b/files/source/row_common.cc
@@ -21,6 +21,12 @@ namespace libyuv {
extern "C" {
#endif
+#ifdef __cplusplus
+#define STATIC_CAST(type, expr) static_cast<type>(expr)
+#else
+#define STATIC_CAST(type, expr) (type)(expr)
+#endif
+
// This macro controls YUV to RGB using unsigned math to extend range of
// YUV to RGB coefficients to 0 to 4 instead of 0 to 2 for more accuracy on B:
// LIBYUV_UNLIMITED_DATA
@@ -182,12 +188,13 @@ void RGB565ToARGBRow_C(const uint8_t* src_rgb565,
int width) {
int x;
for (x = 0; x < width; ++x) {
- uint8_t b = src_rgb565[0] & 0x1f;
- uint8_t g = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3);
- uint8_t r = src_rgb565[1] >> 3;
- dst_argb[0] = (b << 3) | (b >> 2);
- dst_argb[1] = (g << 2) | (g >> 4);
- dst_argb[2] = (r << 3) | (r >> 2);
+ uint8_t b = STATIC_CAST(uint8_t, src_rgb565[0] & 0x1f);
+ uint8_t g = STATIC_CAST(
+ uint8_t, (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3));
+ uint8_t r = STATIC_CAST(uint8_t, src_rgb565[1] >> 3);
+ dst_argb[0] = STATIC_CAST(uint8_t, (b << 3) | (b >> 2));
+ dst_argb[1] = STATIC_CAST(uint8_t, (g << 2) | (g >> 4));
+ dst_argb[2] = STATIC_CAST(uint8_t, (r << 3) | (r >> 2));
dst_argb[3] = 255u;
dst_argb += 4;
src_rgb565 += 2;
@@ -199,13 +206,14 @@ void ARGB1555ToARGBRow_C(const uint8_t* src_argb1555,
int width) {
int x;
for (x = 0; x < width; ++x) {
- uint8_t b = src_argb1555[0] & 0x1f;
- uint8_t g = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3);
- uint8_t r = (src_argb1555[1] & 0x7c) >> 2;
- uint8_t a = src_argb1555[1] >> 7;
- dst_argb[0] = (b << 3) | (b >> 2);
- dst_argb[1] = (g << 3) | (g >> 2);
- dst_argb[2] = (r << 3) | (r >> 2);
+ uint8_t b = STATIC_CAST(uint8_t, src_argb1555[0] & 0x1f);
+ uint8_t g = STATIC_CAST(
+ uint8_t, (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3));
+ uint8_t r = STATIC_CAST(uint8_t, (src_argb1555[1] & 0x7c) >> 2);
+ uint8_t a = STATIC_CAST(uint8_t, src_argb1555[1] >> 7);
+ dst_argb[0] = STATIC_CAST(uint8_t, (b << 3) | (b >> 2));
+ dst_argb[1] = STATIC_CAST(uint8_t, (g << 3) | (g >> 2));
+ dst_argb[2] = STATIC_CAST(uint8_t, (r << 3) | (r >> 2));
dst_argb[3] = -a;
dst_argb += 4;
src_argb1555 += 2;
@@ -217,14 +225,14 @@ void ARGB4444ToARGBRow_C(const uint8_t* src_argb4444,
int width) {
int x;
for (x = 0; x < width; ++x) {
- uint8_t b = src_argb4444[0] & 0x0f;
- uint8_t g = src_argb4444[0] >> 4;
- uint8_t r = src_argb4444[1] & 0x0f;
- uint8_t a = src_argb4444[1] >> 4;
- dst_argb[0] = (b << 4) | b;
- dst_argb[1] = (g << 4) | g;
- dst_argb[2] = (r << 4) | r;
- dst_argb[3] = (a << 4) | a;
+ uint8_t b = STATIC_CAST(uint8_t, src_argb4444[0] & 0x0f);
+ uint8_t g = STATIC_CAST(uint8_t, src_argb4444[0] >> 4);
+ uint8_t r = STATIC_CAST(uint8_t, src_argb4444[1] & 0x0f);
+ uint8_t a = STATIC_CAST(uint8_t, src_argb4444[1] >> 4);
+ dst_argb[0] = STATIC_CAST(uint8_t, (b << 4) | b);
+ dst_argb[1] = STATIC_CAST(uint8_t, (g << 4) | g);
+ dst_argb[2] = STATIC_CAST(uint8_t, (r << 4) | r);
+ dst_argb[3] = STATIC_CAST(uint8_t, (a << 4) | a);
dst_argb += 4;
src_argb4444 += 2;
}
@@ -320,7 +328,7 @@ void ARGBToRGB565Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
uint8_t b0 = src_argb[0] >> 3;
uint8_t g0 = src_argb[1] >> 2;
uint8_t r0 = src_argb[2] >> 3;
- *(uint16_t*)(dst_rgb) = b0 | (g0 << 5) | (r0 << 11);
+ *(uint16_t*)(dst_rgb) = STATIC_CAST(uint16_t, b0 | (g0 << 5) | (r0 << 11));
}
}
@@ -334,29 +342,31 @@ void ARGBToRGB565Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
// or the upper byte for big endian.
void ARGBToRGB565DitherRow_C(const uint8_t* src_argb,
uint8_t* dst_rgb,
- const uint32_t dither4,
+ uint32_t dither4,
int width) {
int x;
for (x = 0; x < width - 1; x += 2) {
int dither0 = ((const unsigned char*)(&dither4))[x & 3];
int dither1 = ((const unsigned char*)(&dither4))[(x + 1) & 3];
- uint8_t b0 = clamp255(src_argb[0] + dither0) >> 3;
- uint8_t g0 = clamp255(src_argb[1] + dither0) >> 2;
- uint8_t r0 = clamp255(src_argb[2] + dither0) >> 3;
- uint8_t b1 = clamp255(src_argb[4] + dither1) >> 3;
- uint8_t g1 = clamp255(src_argb[5] + dither1) >> 2;
- uint8_t r1 = clamp255(src_argb[6] + dither1) >> 3;
- *(uint16_t*)(dst_rgb + 0) = b0 | (g0 << 5) | (r0 << 11);
- *(uint16_t*)(dst_rgb + 2) = b1 | (g1 << 5) | (r1 << 11);
+ uint8_t b0 = STATIC_CAST(uint8_t, clamp255(src_argb[0] + dither0) >> 3);
+ uint8_t g0 = STATIC_CAST(uint8_t, clamp255(src_argb[1] + dither0) >> 2);
+ uint8_t r0 = STATIC_CAST(uint8_t, clamp255(src_argb[2] + dither0) >> 3);
+ uint8_t b1 = STATIC_CAST(uint8_t, clamp255(src_argb[4] + dither1) >> 3);
+ uint8_t g1 = STATIC_CAST(uint8_t, clamp255(src_argb[5] + dither1) >> 2);
+ uint8_t r1 = STATIC_CAST(uint8_t, clamp255(src_argb[6] + dither1) >> 3);
+ *(uint16_t*)(dst_rgb + 0) =
+ STATIC_CAST(uint16_t, b0 | (g0 << 5) | (r0 << 11));
+ *(uint16_t*)(dst_rgb + 2) =
+ STATIC_CAST(uint16_t, b1 | (g1 << 5) | (r1 << 11));
dst_rgb += 4;
src_argb += 8;
}
if (width & 1) {
int dither0 = ((const unsigned char*)(&dither4))[(width - 1) & 3];
- uint8_t b0 = clamp255(src_argb[0] + dither0) >> 3;
- uint8_t g0 = clamp255(src_argb[1] + dither0) >> 2;
- uint8_t r0 = clamp255(src_argb[2] + dither0) >> 3;
- *(uint16_t*)(dst_rgb) = b0 | (g0 << 5) | (r0 << 11);
+ uint8_t b0 = STATIC_CAST(uint8_t, clamp255(src_argb[0] + dither0) >> 3);
+ uint8_t g0 = STATIC_CAST(uint8_t, clamp255(src_argb[1] + dither0) >> 2);
+ uint8_t r0 = STATIC_CAST(uint8_t, clamp255(src_argb[2] + dither0) >> 3);
+ *(uint16_t*)(dst_rgb) = STATIC_CAST(uint16_t, b0 | (g0 << 5) | (r0 << 11));
}
}
@@ -371,8 +381,10 @@ void ARGBToARGB1555Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
uint8_t g1 = src_argb[5] >> 3;
uint8_t r1 = src_argb[6] >> 3;
uint8_t a1 = src_argb[7] >> 7;
- *(uint16_t*)(dst_rgb + 0) = b0 | (g0 << 5) | (r0 << 10) | (a0 << 15);
- *(uint16_t*)(dst_rgb + 2) = b1 | (g1 << 5) | (r1 << 10) | (a1 << 15);
+ *(uint16_t*)(dst_rgb + 0) =
+ STATIC_CAST(uint16_t, b0 | (g0 << 5) | (r0 << 10) | (a0 << 15));
+ *(uint16_t*)(dst_rgb + 2) =
+ STATIC_CAST(uint16_t, b1 | (g1 << 5) | (r1 << 10) | (a1 << 15));
dst_rgb += 4;
src_argb += 8;
}
@@ -381,7 +393,8 @@ void ARGBToARGB1555Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
uint8_t g0 = src_argb[1] >> 3;
uint8_t r0 = src_argb[2] >> 3;
uint8_t a0 = src_argb[3] >> 7;
- *(uint16_t*)(dst_rgb) = b0 | (g0 << 5) | (r0 << 10) | (a0 << 15);
+ *(uint16_t*)(dst_rgb) =
+ STATIC_CAST(uint16_t, b0 | (g0 << 5) | (r0 << 10) | (a0 << 15));
}
}
@@ -396,8 +409,10 @@ void ARGBToARGB4444Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
uint8_t g1 = src_argb[5] >> 4;
uint8_t r1 = src_argb[6] >> 4;
uint8_t a1 = src_argb[7] >> 4;
- *(uint16_t*)(dst_rgb + 0) = b0 | (g0 << 4) | (r0 << 8) | (a0 << 12);
- *(uint16_t*)(dst_rgb + 2) = b1 | (g1 << 4) | (r1 << 8) | (a1 << 12);
+ *(uint16_t*)(dst_rgb + 0) =
+ STATIC_CAST(uint16_t, b0 | (g0 << 4) | (r0 << 8) | (a0 << 12));
+ *(uint16_t*)(dst_rgb + 2) =
+ STATIC_CAST(uint16_t, b1 | (g1 << 4) | (r1 << 8) | (a1 << 12));
dst_rgb += 4;
src_argb += 8;
}
@@ -406,18 +421,20 @@ void ARGBToARGB4444Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
uint8_t g0 = src_argb[1] >> 4;
uint8_t r0 = src_argb[2] >> 4;
uint8_t a0 = src_argb[3] >> 4;
- *(uint16_t*)(dst_rgb) = b0 | (g0 << 4) | (r0 << 8) | (a0 << 12);
+ *(uint16_t*)(dst_rgb) =
+ STATIC_CAST(uint16_t, b0 | (g0 << 4) | (r0 << 8) | (a0 << 12));
}
}
void ABGRToAR30Row_C(const uint8_t* src_abgr, uint8_t* dst_ar30, int width) {
int x;
for (x = 0; x < width; ++x) {
- uint32_t b0 = (src_abgr[0] >> 6) | ((uint32_t)(src_abgr[0]) << 2);
+ uint32_t r0 = (src_abgr[0] >> 6) | ((uint32_t)(src_abgr[0]) << 2);
uint32_t g0 = (src_abgr[1] >> 6) | ((uint32_t)(src_abgr[1]) << 2);
- uint32_t r0 = (src_abgr[2] >> 6) | ((uint32_t)(src_abgr[2]) << 2);
+ uint32_t b0 = (src_abgr[2] >> 6) | ((uint32_t)(src_abgr[2]) << 2);
uint32_t a0 = (src_abgr[3] >> 6);
- *(uint32_t*)(dst_ar30) = r0 | (g0 << 10) | (b0 << 20) | (a0 << 30);
+ *(uint32_t*)(dst_ar30) =
+ STATIC_CAST(uint32_t, b0 | (g0 << 10) | (r0 << 20) | (a0 << 30));
dst_ar30 += 4;
src_abgr += 4;
}
@@ -430,7 +447,8 @@ void ARGBToAR30Row_C(const uint8_t* src_argb, uint8_t* dst_ar30, int width) {
uint32_t g0 = (src_argb[1] >> 6) | ((uint32_t)(src_argb[1]) << 2);
uint32_t r0 = (src_argb[2] >> 6) | ((uint32_t)(src_argb[2]) << 2);
uint32_t a0 = (src_argb[3] >> 6);
- *(uint32_t*)(dst_ar30) = b0 | (g0 << 10) | (r0 << 20) | (a0 << 30);
+ *(uint32_t*)(dst_ar30) =
+ STATIC_CAST(uint32_t, b0 | (g0 << 10) | (r0 << 20) | (a0 << 30));
dst_ar30 += 4;
src_argb += 4;
}
@@ -439,10 +457,14 @@ void ARGBToAR30Row_C(const uint8_t* src_argb, uint8_t* dst_ar30, int width) {
void ARGBToAR64Row_C(const uint8_t* src_argb, uint16_t* dst_ar64, int width) {
int x;
for (x = 0; x < width; ++x) {
- dst_ar64[0] = src_argb[0] * 0x0101;
- dst_ar64[1] = src_argb[1] * 0x0101;
- dst_ar64[2] = src_argb[2] * 0x0101;
- dst_ar64[3] = src_argb[3] * 0x0101;
+ uint16_t b = src_argb[0] * 0x0101;
+ uint16_t g = src_argb[1] * 0x0101;
+ uint16_t r = src_argb[2] * 0x0101;
+ uint16_t a = src_argb[3] * 0x0101;
+ dst_ar64[0] = b;
+ dst_ar64[1] = g;
+ dst_ar64[2] = r;
+ dst_ar64[3] = a;
dst_ar64 += 4;
src_argb += 4;
}
@@ -451,10 +473,14 @@ void ARGBToAR64Row_C(const uint8_t* src_argb, uint16_t* dst_ar64, int width) {
void ARGBToAB64Row_C(const uint8_t* src_argb, uint16_t* dst_ab64, int width) {
int x;
for (x = 0; x < width; ++x) {
- dst_ab64[0] = src_argb[2] * 0x0101;
- dst_ab64[1] = src_argb[1] * 0x0101;
- dst_ab64[2] = src_argb[0] * 0x0101;
- dst_ab64[3] = src_argb[3] * 0x0101;
+ uint16_t b = src_argb[0] * 0x0101;
+ uint16_t g = src_argb[1] * 0x0101;
+ uint16_t r = src_argb[2] * 0x0101;
+ uint16_t a = src_argb[3] * 0x0101;
+ dst_ab64[0] = r;
+ dst_ab64[1] = g;
+ dst_ab64[2] = b;
+ dst_ab64[3] = a;
dst_ab64 += 4;
src_argb += 4;
}
@@ -463,10 +489,14 @@ void ARGBToAB64Row_C(const uint8_t* src_argb, uint16_t* dst_ab64, int width) {
void AR64ToARGBRow_C(const uint16_t* src_ar64, uint8_t* dst_argb, int width) {
int x;
for (x = 0; x < width; ++x) {
- dst_argb[0] = src_ar64[0] >> 8;
- dst_argb[1] = src_ar64[1] >> 8;
- dst_argb[2] = src_ar64[2] >> 8;
- dst_argb[3] = src_ar64[3] >> 8;
+ uint8_t b = src_ar64[0] >> 8;
+ uint8_t g = src_ar64[1] >> 8;
+ uint8_t r = src_ar64[2] >> 8;
+ uint8_t a = src_ar64[3] >> 8;
+ dst_argb[0] = b;
+ dst_argb[1] = g;
+ dst_argb[2] = r;
+ dst_argb[3] = a;
dst_argb += 4;
src_ar64 += 4;
}
@@ -475,10 +505,14 @@ void AR64ToARGBRow_C(const uint16_t* src_ar64, uint8_t* dst_argb, int width) {
void AB64ToARGBRow_C(const uint16_t* src_ab64, uint8_t* dst_argb, int width) {
int x;
for (x = 0; x < width; ++x) {
- dst_argb[0] = src_ab64[2] >> 8;
- dst_argb[1] = src_ab64[1] >> 8;
- dst_argb[2] = src_ab64[0] >> 8;
- dst_argb[3] = src_ab64[3] >> 8;
+ uint8_t r = src_ab64[0] >> 8;
+ uint8_t g = src_ab64[1] >> 8;
+ uint8_t b = src_ab64[2] >> 8;
+ uint8_t a = src_ab64[3] >> 8;
+ dst_argb[0] = b;
+ dst_argb[1] = g;
+ dst_argb[2] = r;
+ dst_argb[3] = a;
dst_argb += 4;
src_ab64 += 4;
}
@@ -514,8 +548,8 @@ void AR64ShuffleRow_C(const uint8_t* src_ar64,
#ifdef LIBYUV_RGB7
// Old 7 bit math for compatibility on unsupported platforms.
-static __inline int RGBToY(uint8_t r, uint8_t g, uint8_t b) {
- return ((33 * r + 65 * g + 13 * b) >> 7) + 16;
+static __inline uint8_t RGBToY(uint8_t r, uint8_t g, uint8_t b) {
+ return STATIC_CAST(uint8_t, ((33 * r + 65 * g + 13 * b) >> 7) + 16);
}
#else
// 8 bit
@@ -524,8 +558,8 @@ static __inline int RGBToY(uint8_t r, uint8_t g, uint8_t b) {
// return (66 * ((int)r - 128) + 129 * ((int)g - 128) + 25 * ((int)b - 128) +
// 0x7e80) >> 8;
-static __inline int RGBToY(uint8_t r, uint8_t g, uint8_t b) {
- return (66 * r + 129 * g + 25 * b + 0x1080) >> 8;
+static __inline uint8_t RGBToY(uint8_t r, uint8_t g, uint8_t b) {
+ return STATIC_CAST(uint8_t, (66 * r + 129 * g + 25 * b + 0x1080) >> 8);
}
#endif
@@ -533,29 +567,31 @@ static __inline int RGBToY(uint8_t r, uint8_t g, uint8_t b) {
// LIBYUV_RGBTOU_TRUNCATE mimics x86 code that does not round.
#ifdef LIBYUV_RGBTOU_TRUNCATE
-static __inline int RGBToU(uint8_t r, uint8_t g, uint8_t b) {
- return (112 * b - 74 * g - 38 * r + 0x8000) >> 8;
+static __inline uint8_t RGBToU(uint8_t r, uint8_t g, uint8_t b) {
+ return STATIC_CAST(uint8_t, (112 * b - 74 * g - 38 * r + 0x8000) >> 8);
}
-static __inline int RGBToV(uint8_t r, uint8_t g, uint8_t b) {
- return (112 * r - 94 * g - 18 * b + 0x8000) >> 8;
+static __inline uint8_t RGBToV(uint8_t r, uint8_t g, uint8_t b) {
+ return STATIC_CAST(uint8_t, (112 * r - 94 * g - 18 * b + 0x8000) >> 8);
}
#else
// TODO(fbarchard): Add rounding to x86 SIMD and use this
-static __inline int RGBToU(uint8_t r, uint8_t g, uint8_t b) {
- return (112 * b - 74 * g - 38 * r + 0x8080) >> 8;
+static __inline uint8_t RGBToU(uint8_t r, uint8_t g, uint8_t b) {
+ return STATIC_CAST(uint8_t, (112 * b - 74 * g - 38 * r + 0x8080) >> 8);
}
-static __inline int RGBToV(uint8_t r, uint8_t g, uint8_t b) {
- return (112 * r - 94 * g - 18 * b + 0x8080) >> 8;
+static __inline uint8_t RGBToV(uint8_t r, uint8_t g, uint8_t b) {
+ return STATIC_CAST(uint8_t, (112 * r - 94 * g - 18 * b + 0x8080) >> 8);
}
#endif
// LIBYUV_ARGBTOUV_PAVGB mimics x86 code that subsamples with 2 pavgb.
#if !defined(LIBYUV_ARGBTOUV_PAVGB)
static __inline int RGB2xToU(uint16_t r, uint16_t g, uint16_t b) {
- return ((112 / 2) * b - (74 / 2) * g - (38 / 2) * r + 0x8080) >> 8;
+ return STATIC_CAST(
+ uint8_t, ((112 / 2) * b - (74 / 2) * g - (38 / 2) * r + 0x8080) >> 8);
}
static __inline int RGB2xToV(uint16_t r, uint16_t g, uint16_t b) {
- return ((112 / 2) * r - (94 / 2) * g - (18 / 2) * b + 0x8080) >> 8;
+ return STATIC_CAST(
+ uint8_t, ((112 / 2) * r - (94 / 2) * g - (18 / 2) * b + 0x8080) >> 8);
}
#endif
@@ -674,28 +710,28 @@ MAKEROWY(RAW, 0, 1, 2, 3)
#ifdef LIBYUV_RGB7
// Old 7 bit math for compatibility on unsupported platforms.
-static __inline int RGBToYJ(uint8_t r, uint8_t g, uint8_t b) {
+static __inline uint8_t RGBToYJ(uint8_t r, uint8_t g, uint8_t b) {
return (38 * r + 75 * g + 15 * b + 64) >> 7;
}
#else
// 8 bit
-static __inline int RGBToYJ(uint8_t r, uint8_t g, uint8_t b) {
+static __inline uint8_t RGBToYJ(uint8_t r, uint8_t g, uint8_t b) {
return (77 * r + 150 * g + 29 * b + 128) >> 8;
}
#endif
#if defined(LIBYUV_ARGBTOUV_PAVGB)
-static __inline int RGBToUJ(uint8_t r, uint8_t g, uint8_t b) {
+static __inline uint8_t RGBToUJ(uint8_t r, uint8_t g, uint8_t b) {
return (127 * b - 84 * g - 43 * r + 0x8080) >> 8;
}
-static __inline int RGBToVJ(uint8_t r, uint8_t g, uint8_t b) {
+static __inline uint8_t RGBToVJ(uint8_t r, uint8_t g, uint8_t b) {
return (127 * r - 107 * g - 20 * b + 0x8080) >> 8;
}
#else
-static __inline int RGB2xToUJ(uint16_t r, uint16_t g, uint16_t b) {
+static __inline uint8_t RGB2xToUJ(uint16_t r, uint16_t g, uint16_t b) {
return ((127 / 2) * b - (84 / 2) * g - (43 / 2) * r + 0x8080) >> 8;
}
-static __inline int RGB2xToVJ(uint16_t r, uint16_t g, uint16_t b) {
+static __inline uint8_t RGB2xToVJ(uint16_t r, uint16_t g, uint16_t b) {
return ((127 / 2) * r - (107 / 2) * g - (20 / 2) * b + 0x8080) >> 8;
}
#endif
@@ -782,6 +818,7 @@ static __inline int RGB2xToVJ(uint16_t r, uint16_t g, uint16_t b) {
#endif
MAKEROWYJ(ARGB, 2, 1, 0, 4)
+MAKEROWYJ(ABGR, 0, 1, 2, 4)
MAKEROWYJ(RGBA, 3, 2, 1, 4)
MAKEROWYJ(RGB24, 2, 1, 0, 3)
MAKEROWYJ(RAW, 0, 1, 2, 3)
@@ -791,11 +828,12 @@ void RGB565ToYRow_C(const uint8_t* src_rgb565, uint8_t* dst_y, int width) {
int x;
for (x = 0; x < width; ++x) {
uint8_t b = src_rgb565[0] & 0x1f;
- uint8_t g = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3);
+ uint8_t g = STATIC_CAST(
+ uint8_t, (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3));
uint8_t r = src_rgb565[1] >> 3;
- b = (b << 3) | (b >> 2);
- g = (g << 2) | (g >> 4);
- r = (r << 3) | (r >> 2);
+ b = STATIC_CAST(uint8_t, (b << 3) | (b >> 2));
+ g = STATIC_CAST(uint8_t, (g << 2) | (g >> 4));
+ r = STATIC_CAST(uint8_t, (r << 3) | (r >> 2));
dst_y[0] = RGBToY(r, g, b);
src_rgb565 += 2;
dst_y += 1;
@@ -806,11 +844,12 @@ void ARGB1555ToYRow_C(const uint8_t* src_argb1555, uint8_t* dst_y, int width) {
int x;
for (x = 0; x < width; ++x) {
uint8_t b = src_argb1555[0] & 0x1f;
- uint8_t g = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3);
+ uint8_t g = STATIC_CAST(
+ uint8_t, (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3));
uint8_t r = (src_argb1555[1] & 0x7c) >> 2;
- b = (b << 3) | (b >> 2);
- g = (g << 3) | (g >> 2);
- r = (r << 3) | (r >> 2);
+ b = STATIC_CAST(uint8_t, (b << 3) | (b >> 2));
+ g = STATIC_CAST(uint8_t, (g << 3) | (g >> 2));
+ r = STATIC_CAST(uint8_t, (r << 3) | (r >> 2));
dst_y[0] = RGBToY(r, g, b);
src_argb1555 += 2;
dst_y += 1;
@@ -823,9 +862,9 @@ void ARGB4444ToYRow_C(const uint8_t* src_argb4444, uint8_t* dst_y, int width) {
uint8_t b = src_argb4444[0] & 0x0f;
uint8_t g = src_argb4444[0] >> 4;
uint8_t r = src_argb4444[1] & 0x0f;
- b = (b << 4) | b;
- g = (g << 4) | g;
- r = (r << 4) | r;
+ b = STATIC_CAST(uint8_t, (b << 4) | b);
+ g = STATIC_CAST(uint8_t, (g << 4) | g);
+ r = STATIC_CAST(uint8_t, (r << 4) | r);
dst_y[0] = RGBToY(r, g, b);
src_argb4444 += 2;
dst_y += 1;
@@ -840,31 +879,35 @@ void RGB565ToUVRow_C(const uint8_t* src_rgb565,
const uint8_t* next_rgb565 = src_rgb565 + src_stride_rgb565;
int x;
for (x = 0; x < width - 1; x += 2) {
- uint8_t b0 = src_rgb565[0] & 0x1f;
- uint8_t g0 = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3);
- uint8_t r0 = src_rgb565[1] >> 3;
- uint8_t b1 = src_rgb565[2] & 0x1f;
- uint8_t g1 = (src_rgb565[2] >> 5) | ((src_rgb565[3] & 0x07) << 3);
- uint8_t r1 = src_rgb565[3] >> 3;
- uint8_t b2 = next_rgb565[0] & 0x1f;
- uint8_t g2 = (next_rgb565[0] >> 5) | ((next_rgb565[1] & 0x07) << 3);
- uint8_t r2 = next_rgb565[1] >> 3;
- uint8_t b3 = next_rgb565[2] & 0x1f;
- uint8_t g3 = (next_rgb565[2] >> 5) | ((next_rgb565[3] & 0x07) << 3);
- uint8_t r3 = next_rgb565[3] >> 3;
-
- b0 = (b0 << 3) | (b0 >> 2);
- g0 = (g0 << 2) | (g0 >> 4);
- r0 = (r0 << 3) | (r0 >> 2);
- b1 = (b1 << 3) | (b1 >> 2);
- g1 = (g1 << 2) | (g1 >> 4);
- r1 = (r1 << 3) | (r1 >> 2);
- b2 = (b2 << 3) | (b2 >> 2);
- g2 = (g2 << 2) | (g2 >> 4);
- r2 = (r2 << 3) | (r2 >> 2);
- b3 = (b3 << 3) | (b3 >> 2);
- g3 = (g3 << 2) | (g3 >> 4);
- r3 = (r3 << 3) | (r3 >> 2);
+ uint8_t b0 = STATIC_CAST(uint8_t, src_rgb565[0] & 0x1f);
+ uint8_t g0 = STATIC_CAST(
+ uint8_t, (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3));
+ uint8_t r0 = STATIC_CAST(uint8_t, src_rgb565[1] >> 3);
+ uint8_t b1 = STATIC_CAST(uint8_t, src_rgb565[2] & 0x1f);
+ uint8_t g1 = STATIC_CAST(
+ uint8_t, (src_rgb565[2] >> 5) | ((src_rgb565[3] & 0x07) << 3));
+ uint8_t r1 = STATIC_CAST(uint8_t, src_rgb565[3] >> 3);
+ uint8_t b2 = STATIC_CAST(uint8_t, next_rgb565[0] & 0x1f);
+ uint8_t g2 = STATIC_CAST(
+ uint8_t, (next_rgb565[0] >> 5) | ((next_rgb565[1] & 0x07) << 3));
+ uint8_t r2 = STATIC_CAST(uint8_t, next_rgb565[1] >> 3);
+ uint8_t b3 = STATIC_CAST(uint8_t, next_rgb565[2] & 0x1f);
+ uint8_t g3 = STATIC_CAST(
+ uint8_t, (next_rgb565[2] >> 5) | ((next_rgb565[3] & 0x07) << 3));
+ uint8_t r3 = STATIC_CAST(uint8_t, next_rgb565[3] >> 3);
+
+ b0 = STATIC_CAST(uint8_t, (b0 << 3) | (b0 >> 2));
+ g0 = STATIC_CAST(uint8_t, (g0 << 2) | (g0 >> 4));
+ r0 = STATIC_CAST(uint8_t, (r0 << 3) | (r0 >> 2));
+ b1 = STATIC_CAST(uint8_t, (b1 << 3) | (b1 >> 2));
+ g1 = STATIC_CAST(uint8_t, (g1 << 2) | (g1 >> 4));
+ r1 = STATIC_CAST(uint8_t, (r1 << 3) | (r1 >> 2));
+ b2 = STATIC_CAST(uint8_t, (b2 << 3) | (b2 >> 2));
+ g2 = STATIC_CAST(uint8_t, (g2 << 2) | (g2 >> 4));
+ r2 = STATIC_CAST(uint8_t, (r2 << 3) | (r2 >> 2));
+ b3 = STATIC_CAST(uint8_t, (b3 << 3) | (b3 >> 2));
+ g3 = STATIC_CAST(uint8_t, (g3 << 2) | (g3 >> 4));
+ r3 = STATIC_CAST(uint8_t, (r3 << 3) | (r3 >> 2));
#if LIBYUV_ARGBTOUV_PAVGB
uint8_t ab = AVGB(AVGB(b0, b2), AVGB(b1, b3));
@@ -886,19 +929,20 @@ void RGB565ToUVRow_C(const uint8_t* src_rgb565,
dst_v += 1;
}
if (width & 1) {
- uint8_t b0 = src_rgb565[0] & 0x1f;
- uint8_t g0 = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3);
- uint8_t r0 = src_rgb565[1] >> 3;
- uint8_t b2 = next_rgb565[0] & 0x1f;
- uint8_t g2 = (next_rgb565[0] >> 5) | ((next_rgb565[1] & 0x07) << 3);
- uint8_t r2 = next_rgb565[1] >> 3;
-
- b0 = (b0 << 3) | (b0 >> 2);
- g0 = (g0 << 2) | (g0 >> 4);
- r0 = (r0 << 3) | (r0 >> 2);
- b2 = (b2 << 3) | (b2 >> 2);
- g2 = (g2 << 2) | (g2 >> 4);
- r2 = (r2 << 3) | (r2 >> 2);
+ uint8_t b0 = STATIC_CAST(uint8_t, src_rgb565[0] & 0x1f);
+ uint8_t g0 = STATIC_CAST(
+ uint8_t, (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3));
+ uint8_t r0 = STATIC_CAST(uint8_t, src_rgb565[1] >> 3);
+ uint8_t b2 = STATIC_CAST(uint8_t, next_rgb565[0] & 0x1f);
+ uint8_t g2 = STATIC_CAST(
+ uint8_t, (next_rgb565[0] >> 5) | ((next_rgb565[1] & 0x07) << 3));
+ uint8_t r2 = STATIC_CAST(uint8_t, next_rgb565[1] >> 3);
+ b0 = STATIC_CAST(uint8_t, (b0 << 3) | (b0 >> 2));
+ g0 = STATIC_CAST(uint8_t, (g0 << 2) | (g0 >> 4));
+ r0 = STATIC_CAST(uint8_t, (r0 << 3) | (r0 >> 2));
+ b2 = STATIC_CAST(uint8_t, (b2 << 3) | (b2 >> 2));
+ g2 = STATIC_CAST(uint8_t, (g2 << 2) | (g2 >> 4));
+ r2 = STATIC_CAST(uint8_t, (r2 << 3) | (r2 >> 2));
#if LIBYUV_ARGBTOUV_PAVGB
uint8_t ab = AVGB(b0, b2);
@@ -924,31 +968,35 @@ void ARGB1555ToUVRow_C(const uint8_t* src_argb1555,
const uint8_t* next_argb1555 = src_argb1555 + src_stride_argb1555;
int x;
for (x = 0; x < width - 1; x += 2) {
- uint8_t b0 = src_argb1555[0] & 0x1f;
- uint8_t g0 = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3);
- uint8_t r0 = (src_argb1555[1] & 0x7c) >> 2;
- uint8_t b1 = src_argb1555[2] & 0x1f;
- uint8_t g1 = (src_argb1555[2] >> 5) | ((src_argb1555[3] & 0x03) << 3);
- uint8_t r1 = (src_argb1555[3] & 0x7c) >> 2;
- uint8_t b2 = next_argb1555[0] & 0x1f;
- uint8_t g2 = (next_argb1555[0] >> 5) | ((next_argb1555[1] & 0x03) << 3);
- uint8_t r2 = (next_argb1555[1] & 0x7c) >> 2;
- uint8_t b3 = next_argb1555[2] & 0x1f;
- uint8_t g3 = (next_argb1555[2] >> 5) | ((next_argb1555[3] & 0x03) << 3);
- uint8_t r3 = (next_argb1555[3] & 0x7c) >> 2;
-
- b0 = (b0 << 3) | (b0 >> 2);
- g0 = (g0 << 3) | (g0 >> 2);
- r0 = (r0 << 3) | (r0 >> 2);
- b1 = (b1 << 3) | (b1 >> 2);
- g1 = (g1 << 3) | (g1 >> 2);
- r1 = (r1 << 3) | (r1 >> 2);
- b2 = (b2 << 3) | (b2 >> 2);
- g2 = (g2 << 3) | (g2 >> 2);
- r2 = (r2 << 3) | (r2 >> 2);
- b3 = (b3 << 3) | (b3 >> 2);
- g3 = (g3 << 3) | (g3 >> 2);
- r3 = (r3 << 3) | (r3 >> 2);
+ uint8_t b0 = STATIC_CAST(uint8_t, src_argb1555[0] & 0x1f);
+ uint8_t g0 = STATIC_CAST(
+ uint8_t, (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3));
+ uint8_t r0 = STATIC_CAST(uint8_t, (src_argb1555[1] & 0x7c) >> 2);
+ uint8_t b1 = STATIC_CAST(uint8_t, src_argb1555[2] & 0x1f);
+ uint8_t g1 = STATIC_CAST(
+ uint8_t, (src_argb1555[2] >> 5) | ((src_argb1555[3] & 0x03) << 3));
+ uint8_t r1 = STATIC_CAST(uint8_t, (src_argb1555[3] & 0x7c) >> 2);
+ uint8_t b2 = STATIC_CAST(uint8_t, next_argb1555[0] & 0x1f);
+ uint8_t g2 = STATIC_CAST(
+ uint8_t, (next_argb1555[0] >> 5) | ((next_argb1555[1] & 0x03) << 3));
+ uint8_t r2 = STATIC_CAST(uint8_t, (next_argb1555[1] & 0x7c) >> 2);
+ uint8_t b3 = STATIC_CAST(uint8_t, next_argb1555[2] & 0x1f);
+ uint8_t g3 = STATIC_CAST(
+ uint8_t, (next_argb1555[2] >> 5) | ((next_argb1555[3] & 0x03) << 3));
+ uint8_t r3 = STATIC_CAST(uint8_t, (next_argb1555[3] & 0x7c) >> 2);
+
+ b0 = STATIC_CAST(uint8_t, (b0 << 3) | (b0 >> 2));
+ g0 = STATIC_CAST(uint8_t, (g0 << 3) | (g0 >> 2));
+ r0 = STATIC_CAST(uint8_t, (r0 << 3) | (r0 >> 2));
+ b1 = STATIC_CAST(uint8_t, (b1 << 3) | (b1 >> 2));
+ g1 = STATIC_CAST(uint8_t, (g1 << 3) | (g1 >> 2));
+ r1 = STATIC_CAST(uint8_t, (r1 << 3) | (r1 >> 2));
+ b2 = STATIC_CAST(uint8_t, (b2 << 3) | (b2 >> 2));
+ g2 = STATIC_CAST(uint8_t, (g2 << 3) | (g2 >> 2));
+ r2 = STATIC_CAST(uint8_t, (r2 << 3) | (r2 >> 2));
+ b3 = STATIC_CAST(uint8_t, (b3 << 3) | (b3 >> 2));
+ g3 = STATIC_CAST(uint8_t, (g3 << 3) | (g3 >> 2));
+ r3 = STATIC_CAST(uint8_t, (r3 << 3) | (r3 >> 2));
#if LIBYUV_ARGBTOUV_PAVGB
uint8_t ab = AVGB(AVGB(b0, b2), AVGB(b1, b3));
@@ -970,19 +1018,21 @@ void ARGB1555ToUVRow_C(const uint8_t* src_argb1555,
dst_v += 1;
}
if (width & 1) {
- uint8_t b0 = src_argb1555[0] & 0x1f;
- uint8_t g0 = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3);
- uint8_t r0 = (src_argb1555[1] & 0x7c) >> 2;
- uint8_t b2 = next_argb1555[0] & 0x1f;
- uint8_t g2 = (next_argb1555[0] >> 5) | ((next_argb1555[1] & 0x03) << 3);
- uint8_t r2 = (next_argb1555[1] & 0x7c) >> 2;
-
- b0 = (b0 << 3) | (b0 >> 2);
- g0 = (g0 << 3) | (g0 >> 2);
- r0 = (r0 << 3) | (r0 >> 2);
- b2 = (b2 << 3) | (b2 >> 2);
- g2 = (g2 << 3) | (g2 >> 2);
- r2 = (r2 << 3) | (r2 >> 2);
+ uint8_t b0 = STATIC_CAST(uint8_t, src_argb1555[0] & 0x1f);
+ uint8_t g0 = STATIC_CAST(
+ uint8_t, (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3));
+ uint8_t r0 = STATIC_CAST(uint8_t, (src_argb1555[1] & 0x7c) >> 2);
+ uint8_t b2 = STATIC_CAST(uint8_t, next_argb1555[0] & 0x1f);
+ uint8_t g2 = STATIC_CAST(
+ uint8_t, (next_argb1555[0] >> 5) | ((next_argb1555[1] & 0x03) << 3));
+ uint8_t r2 = STATIC_CAST(uint8_t, (next_argb1555[1] & 0x7c) >> 2);
+
+ b0 = STATIC_CAST(uint8_t, (b0 << 3) | (b0 >> 2));
+ g0 = STATIC_CAST(uint8_t, (g0 << 3) | (g0 >> 2));
+ r0 = STATIC_CAST(uint8_t, (r0 << 3) | (r0 >> 2));
+ b2 = STATIC_CAST(uint8_t, (b2 << 3) | (b2 >> 2));
+ g2 = STATIC_CAST(uint8_t, (g2 << 3) | (g2 >> 2));
+ r2 = STATIC_CAST(uint8_t, (r2 << 3) | (r2 >> 2));
#if LIBYUV_ARGBTOUV_PAVGB
uint8_t ab = AVGB(b0, b2);
@@ -1021,18 +1071,18 @@ void ARGB4444ToUVRow_C(const uint8_t* src_argb4444,
uint8_t g3 = next_argb4444[2] >> 4;
uint8_t r3 = next_argb4444[3] & 0x0f;
- b0 = (b0 << 4) | b0;
- g0 = (g0 << 4) | g0;
- r0 = (r0 << 4) | r0;
- b1 = (b1 << 4) | b1;
- g1 = (g1 << 4) | g1;
- r1 = (r1 << 4) | r1;
- b2 = (b2 << 4) | b2;
- g2 = (g2 << 4) | g2;
- r2 = (r2 << 4) | r2;
- b3 = (b3 << 4) | b3;
- g3 = (g3 << 4) | g3;
- r3 = (r3 << 4) | r3;
+ b0 = STATIC_CAST(uint8_t, (b0 << 4) | b0);
+ g0 = STATIC_CAST(uint8_t, (g0 << 4) | g0);
+ r0 = STATIC_CAST(uint8_t, (r0 << 4) | r0);
+ b1 = STATIC_CAST(uint8_t, (b1 << 4) | b1);
+ g1 = STATIC_CAST(uint8_t, (g1 << 4) | g1);
+ r1 = STATIC_CAST(uint8_t, (r1 << 4) | r1);
+ b2 = STATIC_CAST(uint8_t, (b2 << 4) | b2);
+ g2 = STATIC_CAST(uint8_t, (g2 << 4) | g2);
+ r2 = STATIC_CAST(uint8_t, (r2 << 4) | r2);
+ b3 = STATIC_CAST(uint8_t, (b3 << 4) | b3);
+ g3 = STATIC_CAST(uint8_t, (g3 << 4) | g3);
+ r3 = STATIC_CAST(uint8_t, (r3 << 4) | r3);
#if LIBYUV_ARGBTOUV_PAVGB
uint8_t ab = AVGB(AVGB(b0, b2), AVGB(b1, b3));
@@ -1061,12 +1111,12 @@ void ARGB4444ToUVRow_C(const uint8_t* src_argb4444,
uint8_t g2 = next_argb4444[0] >> 4;
uint8_t r2 = next_argb4444[1] & 0x0f;
- b0 = (b0 << 4) | b0;
- g0 = (g0 << 4) | g0;
- r0 = (r0 << 4) | r0;
- b2 = (b2 << 4) | b2;
- g2 = (g2 << 4) | g2;
- r2 = (r2 << 4) | r2;
+ b0 = STATIC_CAST(uint8_t, (b0 << 4) | b0);
+ g0 = STATIC_CAST(uint8_t, (g0 << 4) | g0);
+ r0 = STATIC_CAST(uint8_t, (r0 << 4) | r0);
+ b2 = STATIC_CAST(uint8_t, (b2 << 4) | b2);
+ g2 = STATIC_CAST(uint8_t, (g2 << 4) | g2);
+ r2 = STATIC_CAST(uint8_t, (r2 << 4) | r2);
#if LIBYUV_ARGBTOUV_PAVGB
uint8_t ab = AVGB(b0, b2);
@@ -1123,9 +1173,9 @@ void ARGBSepiaRow_C(uint8_t* dst_argb, int width) {
int sg = (b * 22 + g * 88 + r * 45) >> 7;
int sr = (b * 24 + g * 98 + r * 50) >> 7;
// b does not over flow. a is preserved from original.
- dst_argb[0] = sb;
- dst_argb[1] = clamp255(sg);
- dst_argb[2] = clamp255(sr);
+ dst_argb[0] = STATIC_CAST(uint8_t, sb);
+ dst_argb[1] = STATIC_CAST(uint8_t, clamp255(sg));
+ dst_argb[2] = STATIC_CAST(uint8_t, clamp255(sr));
dst_argb += 4;
}
}
@@ -1154,10 +1204,10 @@ void ARGBColorMatrixRow_C(const uint8_t* src_argb,
int sa = (b * matrix_argb[12] + g * matrix_argb[13] + r * matrix_argb[14] +
a * matrix_argb[15]) >>
6;
- dst_argb[0] = Clamp(sb);
- dst_argb[1] = Clamp(sg);
- dst_argb[2] = Clamp(sr);
- dst_argb[3] = Clamp(sa);
+ dst_argb[0] = STATIC_CAST(uint8_t, Clamp(sb));
+ dst_argb[1] = STATIC_CAST(uint8_t, Clamp(sg));
+ dst_argb[2] = STATIC_CAST(uint8_t, Clamp(sr));
+ dst_argb[3] = STATIC_CAST(uint8_t, Clamp(sa));
src_argb += 4;
dst_argb += 4;
}
@@ -1207,9 +1257,12 @@ void ARGBQuantizeRow_C(uint8_t* dst_argb,
int b = dst_argb[0];
int g = dst_argb[1];
int r = dst_argb[2];
- dst_argb[0] = (b * scale >> 16) * interval_size + interval_offset;
- dst_argb[1] = (g * scale >> 16) * interval_size + interval_offset;
- dst_argb[2] = (r * scale >> 16) * interval_size + interval_offset;
+ dst_argb[0] = STATIC_CAST(
+ uint8_t, (b * scale >> 16) * interval_size + interval_offset);
+ dst_argb[1] = STATIC_CAST(
+ uint8_t, (g * scale >> 16) * interval_size + interval_offset);
+ dst_argb[2] = STATIC_CAST(
+ uint8_t, (r * scale >> 16) * interval_size + interval_offset);
dst_argb += 4;
}
}
@@ -1260,10 +1313,10 @@ void ARGBMultiplyRow_C(const uint8_t* src_argb,
const uint32_t g_scale = src_argb1[1];
const uint32_t r_scale = src_argb1[2];
const uint32_t a_scale = src_argb1[3];
- dst_argb[0] = SHADE(b, b_scale);
- dst_argb[1] = SHADE(g, g_scale);
- dst_argb[2] = SHADE(r, r_scale);
- dst_argb[3] = SHADE(a, a_scale);
+ dst_argb[0] = STATIC_CAST(uint8_t, SHADE(b, b_scale));
+ dst_argb[1] = STATIC_CAST(uint8_t, SHADE(g, g_scale));
+ dst_argb[2] = STATIC_CAST(uint8_t, SHADE(r, r_scale));
+ dst_argb[3] = STATIC_CAST(uint8_t, SHADE(a, a_scale));
src_argb += 4;
src_argb1 += 4;
dst_argb += 4;
@@ -1288,10 +1341,10 @@ void ARGBAddRow_C(const uint8_t* src_argb,
const int g_add = src_argb1[1];
const int r_add = src_argb1[2];
const int a_add = src_argb1[3];
- dst_argb[0] = SHADE(b, b_add);
- dst_argb[1] = SHADE(g, g_add);
- dst_argb[2] = SHADE(r, r_add);
- dst_argb[3] = SHADE(a, a_add);
+ dst_argb[0] = STATIC_CAST(uint8_t, SHADE(b, b_add));
+ dst_argb[1] = STATIC_CAST(uint8_t, SHADE(g, g_add));
+ dst_argb[2] = STATIC_CAST(uint8_t, SHADE(r, r_add));
+ dst_argb[3] = STATIC_CAST(uint8_t, SHADE(a, a_add));
src_argb += 4;
src_argb1 += 4;
dst_argb += 4;
@@ -1315,10 +1368,10 @@ void ARGBSubtractRow_C(const uint8_t* src_argb,
const int g_sub = src_argb1[1];
const int r_sub = src_argb1[2];
const int a_sub = src_argb1[3];
- dst_argb[0] = SHADE(b, b_sub);
- dst_argb[1] = SHADE(g, g_sub);
- dst_argb[2] = SHADE(r, r_sub);
- dst_argb[3] = SHADE(a, a_sub);
+ dst_argb[0] = STATIC_CAST(uint8_t, SHADE(b, b_sub));
+ dst_argb[1] = STATIC_CAST(uint8_t, SHADE(g, g_sub));
+ dst_argb[2] = STATIC_CAST(uint8_t, SHADE(r, r_sub));
+ dst_argb[3] = STATIC_CAST(uint8_t, SHADE(a, a_sub));
src_argb += 4;
src_argb1 += 4;
dst_argb += 4;
@@ -1431,7 +1484,7 @@ void J400ToARGBRow_C(const uint8_t* src_y, uint8_t* dst_argb, int width) {
// clang-format off
-#if defined(__aarch64__) || defined(__arm__)
+#if defined(__aarch64__) || defined(__arm__) || defined(__riscv)
// Bias values include subtract 128 from U and V, bias from Y and rounding.
// For B and R bias is negative. For G bias is positive.
#define YUVCONSTANTSBODY(YG, YB, UB, UG, VG, VR) \
@@ -1627,7 +1680,7 @@ MAKEYUVCONSTANTS(V2020, YG, YB, UB, UG, VG, VR)
#undef MAKEYUVCONSTANTS
-#if defined(__aarch64__) || defined(__arm__)
+#if defined(__aarch64__) || defined(__arm__) || defined(__riscv)
#define LOAD_YUV_CONSTANTS \
int ub = yuvconstants->kUVCoeff[0]; \
int vr = yuvconstants->kUVCoeff[1]; \
@@ -1675,9 +1728,9 @@ static __inline void YuvPixel(uint8_t y,
LOAD_YUV_CONSTANTS;
uint32_t y32 = y * 0x0101;
CALC_RGB16;
- *b = Clamp((int32_t)(b16) >> 6);
- *g = Clamp((int32_t)(g16) >> 6);
- *r = Clamp((int32_t)(r16) >> 6);
+ *b = STATIC_CAST(uint8_t, Clamp((int32_t)(b16) >> 6));
+ *g = STATIC_CAST(uint8_t, Clamp((int32_t)(g16) >> 6));
+ *r = STATIC_CAST(uint8_t, Clamp((int32_t)(r16) >> 6));
}
// Reads 8 bit YUV and leaves result as 16 bit.
@@ -1706,9 +1759,9 @@ static __inline void YuvPixel10_16(uint16_t y,
int* r,
const struct YuvConstants* yuvconstants) {
LOAD_YUV_CONSTANTS;
- uint32_t y32 = y << 6;
- u = clamp255(u >> 2);
- v = clamp255(v >> 2);
+ uint32_t y32 = (y << 6) | (y >> 4);
+ u = STATIC_CAST(uint8_t, clamp255(u >> 2));
+ v = STATIC_CAST(uint8_t, clamp255(v >> 2));
CALC_RGB16;
*b = b16;
*g = g16;
@@ -1725,9 +1778,9 @@ static __inline void YuvPixel12_16(int16_t y,
int* r,
const struct YuvConstants* yuvconstants) {
LOAD_YUV_CONSTANTS;
- uint32_t y32 = y << 4;
- u = clamp255(u >> 4);
- v = clamp255(v >> 4);
+ uint32_t y32 = (y << 4) | (y >> 8);
+ u = STATIC_CAST(uint8_t, clamp255(u >> 4));
+ v = STATIC_CAST(uint8_t, clamp255(v >> 4));
CALC_RGB16;
*b = b16;
*g = g16;
@@ -1747,9 +1800,9 @@ static __inline void YuvPixel10(uint16_t y,
int g16;
int r16;
YuvPixel10_16(y, u, v, &b16, &g16, &r16, yuvconstants);
- *b = Clamp(b16 >> 6);
- *g = Clamp(g16 >> 6);
- *r = Clamp(r16 >> 6);
+ *b = STATIC_CAST(uint8_t, Clamp(b16 >> 6));
+ *g = STATIC_CAST(uint8_t, Clamp(g16 >> 6));
+ *r = STATIC_CAST(uint8_t, Clamp(r16 >> 6));
}
// C reference code that mimics the YUV 12 bit assembly.
@@ -1765,9 +1818,9 @@ static __inline void YuvPixel12(uint16_t y,
int g16;
int r16;
YuvPixel12_16(y, u, v, &b16, &g16, &r16, yuvconstants);
- *b = Clamp(b16 >> 6);
- *g = Clamp(g16 >> 6);
- *r = Clamp(r16 >> 6);
+ *b = STATIC_CAST(uint8_t, Clamp(b16 >> 6));
+ *g = STATIC_CAST(uint8_t, Clamp(g16 >> 6));
+ *r = STATIC_CAST(uint8_t, Clamp(r16 >> 6));
}
// C reference code that mimics the YUV 16 bit assembly.
@@ -1781,12 +1834,12 @@ static __inline void YuvPixel16_8(uint16_t y,
const struct YuvConstants* yuvconstants) {
LOAD_YUV_CONSTANTS;
uint32_t y32 = y;
- u = clamp255(u >> 8);
- v = clamp255(v >> 8);
+ u = STATIC_CAST(uint16_t, clamp255(u >> 8));
+ v = STATIC_CAST(uint16_t, clamp255(v >> 8));
CALC_RGB16;
- *b = Clamp((int32_t)(b16) >> 6);
- *g = Clamp((int32_t)(g16) >> 6);
- *r = Clamp((int32_t)(r16) >> 6);
+ *b = STATIC_CAST(uint8_t, Clamp((int32_t)(b16) >> 6));
+ *g = STATIC_CAST(uint8_t, Clamp((int32_t)(g16) >> 6));
+ *r = STATIC_CAST(uint8_t, Clamp((int32_t)(r16) >> 6));
}
// C reference code that mimics the YUV 16 bit assembly.
@@ -1800,8 +1853,8 @@ static __inline void YuvPixel16_16(uint16_t y,
const struct YuvConstants* yuvconstants) {
LOAD_YUV_CONSTANTS;
uint32_t y32 = y;
- u = clamp255(u >> 8);
- v = clamp255(v >> 8);
+ u = STATIC_CAST(uint16_t, clamp255(u >> 8));
+ v = STATIC_CAST(uint16_t, clamp255(v >> 8));
CALC_RGB16;
*b = b16;
*g = g16;
@@ -1815,7 +1868,7 @@ static __inline void YPixel(uint8_t y,
uint8_t* g,
uint8_t* r,
const struct YuvConstants* yuvconstants) {
-#if defined(__aarch64__) || defined(__arm__)
+#if defined(__aarch64__) || defined(__arm__) || defined(__riscv)
int yg = yuvconstants->kRGBCoeffBias[0];
int ygb = yuvconstants->kRGBCoeffBias[4];
#else
@@ -1823,9 +1876,9 @@ static __inline void YPixel(uint8_t y,
int yg = yuvconstants->kYToRgb[0];
#endif
uint32_t y1 = (uint32_t)(y * 0x0101 * yg) >> 16;
- *b = Clamp(((int32_t)(y1) + ygb) >> 6);
- *g = Clamp(((int32_t)(y1) + ygb) >> 6);
- *r = Clamp(((int32_t)(y1) + ygb) >> 6);
+ *b = STATIC_CAST(uint8_t, Clamp(((int32_t)(y1) + ygb) >> 6));
+ *g = STATIC_CAST(uint8_t, Clamp(((int32_t)(y1) + ygb) >> 6));
+ *r = STATIC_CAST(uint8_t, Clamp(((int32_t)(y1) + ygb) >> 6));
}
void I444ToARGBRow_C(const uint8_t* src_y,
@@ -1846,6 +1899,23 @@ void I444ToARGBRow_C(const uint8_t* src_y,
}
}
+void I444ToRGB24Row_C(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* rgb_buf,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ int x;
+ for (x = 0; x < width; ++x) {
+ YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
+ rgb_buf + 2, yuvconstants);
+ src_y += 1;
+ src_u += 1;
+ src_v += 1;
+ rgb_buf += 3; // Advance 1 pixel.
+ }
+}
+
// Also used for 420
void I422ToARGBRow_C(const uint8_t* src_y,
const uint8_t* src_u,
@@ -1929,10 +1999,10 @@ void I210AlphaToARGBRow_C(const uint16_t* src_y,
for (x = 0; x < width - 1; x += 2) {
YuvPixel10(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
rgb_buf + 2, yuvconstants);
- rgb_buf[3] = clamp255(src_a[0] >> 2);
+ rgb_buf[3] = STATIC_CAST(uint8_t, clamp255(src_a[0] >> 2));
YuvPixel10(src_y[1], src_u[0], src_v[0], rgb_buf + 4, rgb_buf + 5,
rgb_buf + 6, yuvconstants);
- rgb_buf[7] = clamp255(src_a[1] >> 2);
+ rgb_buf[7] = STATIC_CAST(uint8_t, clamp255(src_a[1] >> 2));
src_y += 2;
src_u += 1;
src_v += 1;
@@ -1942,7 +2012,7 @@ void I210AlphaToARGBRow_C(const uint16_t* src_y,
if (width & 1) {
YuvPixel10(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
rgb_buf + 2, yuvconstants);
- rgb_buf[3] = clamp255(src_a[0] >> 2);
+ rgb_buf[3] = STATIC_CAST(uint8_t, clamp255(src_a[0] >> 2));
}
}
@@ -1957,7 +2027,7 @@ void I410AlphaToARGBRow_C(const uint16_t* src_y,
for (x = 0; x < width; ++x) {
YuvPixel10(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
rgb_buf + 2, yuvconstants);
- rgb_buf[3] = clamp255(src_a[0] >> 2);
+ rgb_buf[3] = STATIC_CAST(uint8_t, clamp255(src_a[0] >> 2));
src_y += 1;
src_u += 1;
src_v += 1;
@@ -2283,8 +2353,10 @@ void I422ToARGB4444Row_C(const uint8_t* src_y,
b1 = b1 >> 4;
g1 = g1 >> 4;
r1 = r1 >> 4;
- *(uint16_t*)(dst_argb4444 + 0) = b0 | (g0 << 4) | (r0 << 8) | 0xf000;
- *(uint16_t*)(dst_argb4444 + 2) = b1 | (g1 << 4) | (r1 << 8) | 0xf000;
+ *(uint16_t*)(dst_argb4444 + 0) =
+ STATIC_CAST(uint16_t, b0 | (g0 << 4) | (r0 << 8) | 0xf000);
+ *(uint16_t*)(dst_argb4444 + 2) =
+ STATIC_CAST(uint16_t, b1 | (g1 << 4) | (r1 << 8) | 0xf000);
src_y += 2;
src_u += 1;
src_v += 1;
@@ -2295,7 +2367,8 @@ void I422ToARGB4444Row_C(const uint8_t* src_y,
b0 = b0 >> 4;
g0 = g0 >> 4;
r0 = r0 >> 4;
- *(uint16_t*)(dst_argb4444) = b0 | (g0 << 4) | (r0 << 8) | 0xf000;
+ *(uint16_t*)(dst_argb4444) =
+ STATIC_CAST(uint16_t, b0 | (g0 << 4) | (r0 << 8) | 0xf000);
}
}
@@ -2321,8 +2394,10 @@ void I422ToARGB1555Row_C(const uint8_t* src_y,
b1 = b1 >> 3;
g1 = g1 >> 3;
r1 = r1 >> 3;
- *(uint16_t*)(dst_argb1555 + 0) = b0 | (g0 << 5) | (r0 << 10) | 0x8000;
- *(uint16_t*)(dst_argb1555 + 2) = b1 | (g1 << 5) | (r1 << 10) | 0x8000;
+ *(uint16_t*)(dst_argb1555 + 0) =
+ STATIC_CAST(uint16_t, b0 | (g0 << 5) | (r0 << 10) | 0x8000);
+ *(uint16_t*)(dst_argb1555 + 2) =
+ STATIC_CAST(uint16_t, b1 | (g1 << 5) | (r1 << 10) | 0x8000);
src_y += 2;
src_u += 1;
src_v += 1;
@@ -2333,7 +2408,8 @@ void I422ToARGB1555Row_C(const uint8_t* src_y,
b0 = b0 >> 3;
g0 = g0 >> 3;
r0 = r0 >> 3;
- *(uint16_t*)(dst_argb1555) = b0 | (g0 << 5) | (r0 << 10) | 0x8000;
+ *(uint16_t*)(dst_argb1555) =
+ STATIC_CAST(uint16_t, b0 | (g0 << 5) | (r0 << 10) | 0x8000);
}
}
@@ -2359,8 +2435,10 @@ void I422ToRGB565Row_C(const uint8_t* src_y,
b1 = b1 >> 3;
g1 = g1 >> 2;
r1 = r1 >> 3;
- *(uint16_t*)(dst_rgb565 + 0) = b0 | (g0 << 5) | (r0 << 11); // for ubsan
- *(uint16_t*)(dst_rgb565 + 2) = b1 | (g1 << 5) | (r1 << 11);
+ *(uint16_t*)(dst_rgb565 + 0) =
+ STATIC_CAST(uint16_t, b0 | (g0 << 5) | (r0 << 11));
+ *(uint16_t*)(dst_rgb565 + 2) =
+ STATIC_CAST(uint16_t, b1 | (g1 << 5) | (r1 << 11));
src_y += 2;
src_u += 1;
src_v += 1;
@@ -2371,7 +2449,8 @@ void I422ToRGB565Row_C(const uint8_t* src_y,
b0 = b0 >> 3;
g0 = g0 >> 2;
r0 = r0 >> 3;
- *(uint16_t*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11);
+ *(uint16_t*)(dst_rgb565 + 0) =
+ STATIC_CAST(uint16_t, b0 | (g0 << 5) | (r0 << 11));
}
}
@@ -2486,8 +2565,12 @@ void NV12ToRGB565Row_C(const uint8_t* src_y,
b1 = b1 >> 3;
g1 = g1 >> 2;
r1 = r1 >> 3;
- *(uint16_t*)(dst_rgb565 + 0) = b0 | (g0 << 5) | (r0 << 11);
- *(uint16_t*)(dst_rgb565 + 2) = b1 | (g1 << 5) | (r1 << 11);
+ *(uint16_t*)(dst_rgb565 + 0) = STATIC_CAST(uint16_t, b0) |
+ STATIC_CAST(uint16_t, g0 << 5) |
+ STATIC_CAST(uint16_t, r0 << 11);
+ *(uint16_t*)(dst_rgb565 + 2) = STATIC_CAST(uint16_t, b1) |
+ STATIC_CAST(uint16_t, g1 << 5) |
+ STATIC_CAST(uint16_t, r1 << 11);
src_y += 2;
src_uv += 2;
dst_rgb565 += 4; // Advance 2 pixels.
@@ -2497,7 +2580,9 @@ void NV12ToRGB565Row_C(const uint8_t* src_y,
b0 = b0 >> 3;
g0 = g0 >> 2;
r0 = r0 >> 3;
- *(uint16_t*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11);
+ *(uint16_t*)(dst_rgb565) = STATIC_CAST(uint16_t, b0) |
+ STATIC_CAST(uint16_t, g0 << 5) |
+ STATIC_CAST(uint16_t, r0 << 11);
}
}
@@ -2603,6 +2688,19 @@ void MirrorRow_C(const uint8_t* src, uint8_t* dst, int width) {
}
}
+void MirrorRow_16_C(const uint16_t* src, uint16_t* dst, int width) {
+ int x;
+ src += width - 1;
+ for (x = 0; x < width - 1; x += 2) {
+ dst[x] = src[0];
+ dst[x + 1] = src[-1];
+ src -= 2;
+ }
+ if (width & 1) {
+ dst[width - 1] = src[0];
+ }
+}
+
void MirrorUVRow_C(const uint8_t* src_uv, uint8_t* dst_uv, int width) {
int x;
src_uv += (width - 1) << 1;
@@ -2714,6 +2812,21 @@ void DetileRow_C(const uint8_t* src,
}
}
+void DetileRow_16_C(const uint16_t* src,
+ ptrdiff_t src_tile_stride,
+ uint16_t* dst,
+ int width) {
+ int x;
+ for (x = 0; x < width - 15; x += 16) {
+ memcpy(dst, src, 16 * sizeof(uint16_t));
+ dst += 16;
+ src += src_tile_stride;
+ }
+ if (width & 15) {
+ memcpy(dst, src, (width & 15) * sizeof(uint16_t));
+ }
+}
+
void DetileSplitUVRow_C(const uint8_t* src_uv,
ptrdiff_t src_tile_stride,
uint8_t* dst_u,
@@ -2731,6 +2844,51 @@ void DetileSplitUVRow_C(const uint8_t* src_uv,
}
}
+void DetileToYUY2_C(const uint8_t* src_y,
+ ptrdiff_t src_y_tile_stride,
+ const uint8_t* src_uv,
+ ptrdiff_t src_uv_tile_stride,
+ uint8_t* dst_yuy2,
+ int width) {
+ for (int x = 0; x < width - 15; x += 16) {
+ for (int i = 0; i < 8; i++) {
+ dst_yuy2[0] = src_y[0];
+ dst_yuy2[1] = src_uv[0];
+ dst_yuy2[2] = src_y[1];
+ dst_yuy2[3] = src_uv[1];
+ dst_yuy2 += 4;
+ src_y += 2;
+ src_uv += 2;
+ }
+ src_y += src_y_tile_stride - 16;
+ src_uv += src_uv_tile_stride - 16;
+ }
+}
+
+// Unpack MT2T into tiled P010 64 pixels at a time. MT2T's bitstream is encoded
+// in 80 byte blocks representing 64 pixels each. The first 16 bytes of the
+// block contain all of the lower 2 bits of each pixel packed together, and the
+// next 64 bytes represent all the upper 8 bits of the pixel. The lower bits are
+// packed into 1x4 blocks, whereas the upper bits are packed in normal raster
+// order.
+void UnpackMT2T_C(const uint8_t* src, uint16_t* dst, size_t size) {
+ for (size_t i = 0; i < size; i += 80) {
+ const uint8_t* src_lower_bits = src;
+ const uint8_t* src_upper_bits = src + 16;
+
+ for (int j = 0; j < 4; j++) {
+ for (int k = 0; k < 16; k++) {
+ *dst++ = ((src_lower_bits[k] >> (j * 2)) & 0x3) << 6 |
+ (uint16_t)*src_upper_bits << 8 |
+ (uint16_t)*src_upper_bits >> 2;
+ src_upper_bits++;
+ }
+ }
+
+ src += 80;
+ }
+}
+
void SplitRGBRow_C(const uint8_t* src_rgb,
uint8_t* dst_r,
uint8_t* dst_g,
@@ -2823,10 +2981,10 @@ void MergeAR64Row_C(const uint16_t* src_r,
int shift = 16 - depth;
int max = (1 << depth) - 1;
for (x = 0; x < width; ++x) {
- dst_ar64[0] = ClampMax(src_b[x], max) << shift;
- dst_ar64[1] = ClampMax(src_g[x], max) << shift;
- dst_ar64[2] = ClampMax(src_r[x], max) << shift;
- dst_ar64[3] = ClampMax(src_a[x], max) << shift;
+ dst_ar64[0] = STATIC_CAST(uint16_t, ClampMax(src_b[x], max) << shift);
+ dst_ar64[1] = STATIC_CAST(uint16_t, ClampMax(src_g[x], max) << shift);
+ dst_ar64[2] = STATIC_CAST(uint16_t, ClampMax(src_r[x], max) << shift);
+ dst_ar64[3] = STATIC_CAST(uint16_t, ClampMax(src_a[x], max) << shift);
dst_ar64 += 4;
}
}
@@ -2843,10 +3001,10 @@ void MergeARGB16To8Row_C(const uint16_t* src_r,
int x;
int shift = depth - 8;
for (x = 0; x < width; ++x) {
- dst_argb[0] = clamp255(src_b[x] >> shift);
- dst_argb[1] = clamp255(src_g[x] >> shift);
- dst_argb[2] = clamp255(src_r[x] >> shift);
- dst_argb[3] = clamp255(src_a[x] >> shift);
+ dst_argb[0] = STATIC_CAST(uint8_t, clamp255(src_b[x] >> shift));
+ dst_argb[1] = STATIC_CAST(uint8_t, clamp255(src_g[x] >> shift));
+ dst_argb[2] = STATIC_CAST(uint8_t, clamp255(src_r[x] >> shift));
+ dst_argb[3] = STATIC_CAST(uint8_t, clamp255(src_a[x] >> shift));
dst_argb += 4;
}
}
@@ -2863,9 +3021,9 @@ void MergeXR64Row_C(const uint16_t* src_r,
int shift = 16 - depth;
int max = (1 << depth) - 1;
for (x = 0; x < width; ++x) {
- dst_ar64[0] = ClampMax(src_b[x], max) << shift;
- dst_ar64[1] = ClampMax(src_g[x], max) << shift;
- dst_ar64[2] = ClampMax(src_r[x], max) << shift;
+ dst_ar64[0] = STATIC_CAST(uint16_t, ClampMax(src_b[x], max) << shift);
+ dst_ar64[1] = STATIC_CAST(uint16_t, ClampMax(src_g[x], max) << shift);
+ dst_ar64[2] = STATIC_CAST(uint16_t, ClampMax(src_r[x], max) << shift);
dst_ar64[3] = 0xffff;
dst_ar64 += 4;
}
@@ -2882,9 +3040,9 @@ void MergeXRGB16To8Row_C(const uint16_t* src_r,
int x;
int shift = depth - 8;
for (x = 0; x < width; ++x) {
- dst_argb[0] = clamp255(src_b[x] >> shift);
- dst_argb[1] = clamp255(src_g[x] >> shift);
- dst_argb[2] = clamp255(src_r[x] >> shift);
+ dst_argb[0] = STATIC_CAST(uint8_t, clamp255(src_b[x] >> shift));
+ dst_argb[1] = STATIC_CAST(uint8_t, clamp255(src_g[x] >> shift));
+ dst_argb[2] = STATIC_CAST(uint8_t, clamp255(src_r[x] >> shift));
dst_argb[3] = 0xff;
dst_argb += 4;
}
@@ -2930,8 +3088,8 @@ void MergeUVRow_16_C(const uint16_t* src_u,
assert(depth <= 16);
int x;
for (x = 0; x < width; ++x) {
- dst_uv[0] = src_u[x] << shift;
- dst_uv[1] = src_v[x] << shift;
+ dst_uv[0] = STATIC_CAST(uint16_t, src_u[x] << shift);
+ dst_uv[1] = STATIC_CAST(uint16_t, src_v[x] << shift);
dst_uv += 2;
}
}
@@ -2959,7 +3117,7 @@ void MultiplyRow_16_C(const uint16_t* src_y,
int width) {
int x;
for (x = 0; x < width; ++x) {
- dst_y[x] = src_y[x] * scale;
+ dst_y[x] = STATIC_CAST(uint16_t, src_y[x] * scale);
}
}
@@ -2990,7 +3148,7 @@ void Convert16To8Row_C(const uint16_t* src_y,
assert(scale <= 32768);
for (x = 0; x < width; ++x) {
- dst_y[x] = C16TO8(src_y[x], scale);
+ dst_y[x] = STATIC_CAST(uint8_t, C16TO8(src_y[x], scale));
}
}
@@ -3043,6 +3201,21 @@ void YUY2ToUVRow_C(const uint8_t* src_yuy2,
}
}
+// Filter 2 rows of YUY2 UV's (422) into UV (NV12).
+void YUY2ToNVUVRow_C(const uint8_t* src_yuy2,
+ int src_stride_yuy2,
+ uint8_t* dst_uv,
+ int width) {
+ // Output a row of UV values, filtering 2 rows of YUY2.
+ int x;
+ for (x = 0; x < width; x += 2) {
+ dst_uv[0] = (src_yuy2[1] + src_yuy2[src_stride_yuy2 + 1] + 1) >> 1;
+ dst_uv[1] = (src_yuy2[3] + src_yuy2[src_stride_yuy2 + 3] + 1) >> 1;
+ src_yuy2 += 4;
+ dst_uv += 2;
+ }
+}
+
// Copy row of YUY2 UV's (422) into U and V (422).
void YUY2ToUV422Row_C(const uint8_t* src_yuy2,
uint8_t* dst_u,
@@ -3138,9 +3311,9 @@ void ARGBBlendRow_C(const uint8_t* src_argb,
uint32_t bb = src_argb1[0];
uint32_t bg = src_argb1[1];
uint32_t br = src_argb1[2];
- dst_argb[0] = BLEND(fb, bb, a);
- dst_argb[1] = BLEND(fg, bg, a);
- dst_argb[2] = BLEND(fr, br, a);
+ dst_argb[0] = STATIC_CAST(uint8_t, BLEND(fb, bb, a));
+ dst_argb[1] = STATIC_CAST(uint8_t, BLEND(fg, bg, a));
+ dst_argb[2] = STATIC_CAST(uint8_t, BLEND(fr, br, a));
dst_argb[3] = 255u;
fb = src_argb[4 + 0];
@@ -3150,9 +3323,9 @@ void ARGBBlendRow_C(const uint8_t* src_argb,
bb = src_argb1[4 + 0];
bg = src_argb1[4 + 1];
br = src_argb1[4 + 2];
- dst_argb[4 + 0] = BLEND(fb, bb, a);
- dst_argb[4 + 1] = BLEND(fg, bg, a);
- dst_argb[4 + 2] = BLEND(fr, br, a);
+ dst_argb[4 + 0] = STATIC_CAST(uint8_t, BLEND(fb, bb, a));
+ dst_argb[4 + 1] = STATIC_CAST(uint8_t, BLEND(fg, bg, a));
+ dst_argb[4 + 2] = STATIC_CAST(uint8_t, BLEND(fr, br, a));
dst_argb[4 + 3] = 255u;
src_argb += 8;
src_argb1 += 8;
@@ -3167,9 +3340,9 @@ void ARGBBlendRow_C(const uint8_t* src_argb,
uint32_t bb = src_argb1[0];
uint32_t bg = src_argb1[1];
uint32_t br = src_argb1[2];
- dst_argb[0] = BLEND(fb, bb, a);
- dst_argb[1] = BLEND(fg, bg, a);
- dst_argb[2] = BLEND(fr, br, a);
+ dst_argb[0] = STATIC_CAST(uint8_t, BLEND(fb, bb, a));
+ dst_argb[1] = STATIC_CAST(uint8_t, BLEND(fg, bg, a));
+ dst_argb[2] = STATIC_CAST(uint8_t, BLEND(fr, br, a));
dst_argb[3] = 255u;
}
}
@@ -3214,7 +3387,7 @@ void ARGBAttenuateRow_C(const uint8_t* src_argb, uint8_t* dst_argb, int width) {
dst_argb[0] = ATTENUATE(b, a);
dst_argb[1] = ATTENUATE(g, a);
dst_argb[2] = ATTENUATE(r, a);
- dst_argb[3] = a;
+ dst_argb[3] = STATIC_CAST(uint8_t, a);
b = src_argb[4];
g = src_argb[5];
r = src_argb[6];
@@ -3222,7 +3395,7 @@ void ARGBAttenuateRow_C(const uint8_t* src_argb, uint8_t* dst_argb, int width) {
dst_argb[4] = ATTENUATE(b, a);
dst_argb[5] = ATTENUATE(g, a);
dst_argb[6] = ATTENUATE(r, a);
- dst_argb[7] = a;
+ dst_argb[7] = STATIC_CAST(uint8_t, a);
src_argb += 8;
dst_argb += 8;
}
@@ -3235,7 +3408,7 @@ void ARGBAttenuateRow_C(const uint8_t* src_argb, uint8_t* dst_argb, int width) {
dst_argb[0] = ATTENUATE(b, a);
dst_argb[1] = ATTENUATE(g, a);
dst_argb[2] = ATTENUATE(r, a);
- dst_argb[3] = a;
+ dst_argb[3] = STATIC_CAST(uint8_t, a);
}
}
#undef ATTENUATE
@@ -3307,10 +3480,10 @@ void ARGBUnattenuateRow_C(const uint8_t* src_argb,
const uint32_t ia = fixed_invtbl8[a] & 0xffff; // 8.8 fixed point
// Clamping should not be necessary but is free in assembly.
- dst_argb[0] = UNATTENUATE(b, ia);
- dst_argb[1] = UNATTENUATE(g, ia);
- dst_argb[2] = UNATTENUATE(r, ia);
- dst_argb[3] = a;
+ dst_argb[0] = STATIC_CAST(uint8_t, UNATTENUATE(b, ia));
+ dst_argb[1] = STATIC_CAST(uint8_t, UNATTENUATE(g, ia));
+ dst_argb[2] = STATIC_CAST(uint8_t, UNATTENUATE(r, ia));
+ dst_argb[3] = STATIC_CAST(uint8_t, a);
src_argb += 4;
dst_argb += 4;
}
@@ -3344,12 +3517,20 @@ void CumulativeSumToAverageRow_C(const int32_t* tl,
int i;
assert(area != 0);
- ooa = 1.0f / area;
+ ooa = 1.0f / STATIC_CAST(float, area);
for (i = 0; i < count; ++i) {
- dst[0] = (uint8_t)((bl[w + 0] + tl[0] - bl[0] - tl[w + 0]) * ooa);
- dst[1] = (uint8_t)((bl[w + 1] + tl[1] - bl[1] - tl[w + 1]) * ooa);
- dst[2] = (uint8_t)((bl[w + 2] + tl[2] - bl[2] - tl[w + 2]) * ooa);
- dst[3] = (uint8_t)((bl[w + 3] + tl[3] - bl[3] - tl[w + 3]) * ooa);
+ dst[0] =
+ (uint8_t)(STATIC_CAST(float, bl[w + 0] + tl[0] - bl[0] - tl[w + 0]) *
+ ooa);
+ dst[1] =
+ (uint8_t)(STATIC_CAST(float, bl[w + 1] + tl[1] - bl[1] - tl[w + 1]) *
+ ooa);
+ dst[2] =
+ (uint8_t)(STATIC_CAST(float, bl[w + 2] + tl[2] - bl[2] - tl[w + 2]) *
+ ooa);
+ dst[3] =
+ (uint8_t)(STATIC_CAST(float, bl[w + 3] + tl[3] - bl[3] - tl[w + 3]) *
+ ooa);
dst += 4;
tl += 4;
bl += 4;
@@ -3407,7 +3588,9 @@ static void HalfRow_16To8_C(const uint16_t* src_uv,
int width) {
int x;
for (x = 0; x < width; ++x) {
- dst_uv[x] = C16TO8((src_uv[x] + src_uv[src_uv_stride + x] + 1) >> 1, scale);
+ dst_uv[x] = STATIC_CAST(
+ uint8_t,
+ C16TO8((src_uv[x] + src_uv[src_uv_stride + x] + 1) >> 1, scale));
}
}
@@ -3433,8 +3616,9 @@ void InterpolateRow_C(uint8_t* dst_ptr,
return;
}
for (x = 0; x < width; ++x) {
- dst_ptr[0] =
- (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction + 128) >> 8;
+ dst_ptr[0] = STATIC_CAST(
+ uint8_t,
+ (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction + 128) >> 8);
++src_ptr;
++src_ptr1;
++dst_ptr;
@@ -3463,8 +3647,9 @@ void InterpolateRow_16_C(uint16_t* dst_ptr,
return;
}
for (x = 0; x < width; ++x) {
- dst_ptr[0] =
- (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction + 128) >> 8;
+ dst_ptr[0] = STATIC_CAST(
+ uint16_t,
+ (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction + 128) >> 8);
++src_ptr;
++src_ptr1;
++dst_ptr;
@@ -3501,9 +3686,11 @@ void InterpolateRow_16To8_C(uint8_t* dst_ptr,
return;
}
for (x = 0; x < width; ++x) {
- dst_ptr[0] = C16TO8(
- (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction + 128) >> 8,
- scale);
+ dst_ptr[0] = STATIC_CAST(
+ uint8_t,
+ C16TO8(
+ (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction + 128) >> 8,
+ scale));
src_ptr += 1;
src_ptr1 += 1;
dst_ptr += 1;
@@ -3615,10 +3802,10 @@ void ARGBPolynomialRow_C(const uint8_t* src_argb,
dr += poly[14] * r3;
da += poly[15] * a3;
- dst_argb[0] = Clamp((int32_t)(db));
- dst_argb[1] = Clamp((int32_t)(dg));
- dst_argb[2] = Clamp((int32_t)(dr));
- dst_argb[3] = Clamp((int32_t)(da));
+ dst_argb[0] = STATIC_CAST(uint8_t, Clamp((int32_t)(db)));
+ dst_argb[1] = STATIC_CAST(uint8_t, Clamp((int32_t)(dg)));
+ dst_argb[2] = STATIC_CAST(uint8_t, Clamp((int32_t)(dr)));
+ dst_argb[3] = STATIC_CAST(uint8_t, Clamp((int32_t)(da)));
src_argb += 4;
dst_argb += 4;
}
@@ -4023,6 +4210,32 @@ void I422ToRGB24Row_AVX2(const uint8_t* src_y,
}
#endif
+#if defined(HAS_I444TORGB24ROW_AVX2)
+void I444ToRGB24Row_AVX2(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_rgb24,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ // Row buffer for intermediate ARGB pixels.
+ SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
+ while (width > 0) {
+ int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
+ I444ToARGBRow_AVX2(src_y, src_u, src_v, row, yuvconstants, twidth);
+#if defined(HAS_ARGBTORGB24ROW_AVX2)
+ ARGBToRGB24Row_AVX2(row, dst_rgb24, twidth);
+#else
+ ARGBToRGB24Row_SSSE3(row, dst_rgb24, twidth);
+#endif
+ src_y += twidth;
+ src_u += twidth;
+ src_v += twidth;
+ dst_rgb24 += twidth * 3;
+ width -= twidth;
+ }
+}
+#endif
+
#if defined(HAS_NV12TORGB565ROW_AVX2)
void NV12ToRGB565Row_AVX2(const uint8_t* src_y,
const uint8_t* src_uv,
@@ -4164,8 +4377,9 @@ void ScaleSamples_C(const float* src, float* dst, float scale, int width) {
void GaussRow_C(const uint32_t* src, uint16_t* dst, int width) {
int i;
for (i = 0; i < width; ++i) {
- *dst++ =
- (src[0] + src[1] * 4 + src[2] * 6 + src[3] * 4 + src[4] + 128) >> 8;
+ *dst++ = STATIC_CAST(
+ uint16_t,
+ (src[0] + src[1] * 4 + src[2] * 6 + src[3] * 4 + src[4] + 128) >> 8);
++src;
}
}
@@ -4325,6 +4539,8 @@ void HalfMergeUVRow_C(const uint8_t* src_u,
}
}
+#undef STATIC_CAST
+
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
diff --git a/files/source/row_gcc.cc b/files/source/row_gcc.cc
index dce8c439..e94fd04d 100644
--- a/files/source/row_gcc.cc
+++ b/files/source/row_gcc.cc
@@ -27,6 +27,9 @@ static const uvec8 kARGBToY = {25u, 129u, 66u, 0u, 25u, 129u, 66u, 0u,
static const uvec8 kARGBToYJ = {29u, 150u, 77u, 0u, 29u, 150u, 77u, 0u,
29u, 150u, 77u, 0u, 29u, 150u, 77u, 0u};
+static const uvec8 kABGRToYJ = {77u, 150u, 29u, 0u, 77u, 150u, 29u, 0u,
+ 77u, 150u, 29u, 0u, 77u, 150u, 29u, 0u};
+
static const uvec8 kRGBAToYJ = {0u, 29u, 150u, 77u, 0u, 29u, 150u, 77u,
0u, 29u, 150u, 77u, 0u, 29u, 150u, 77u};
#endif // defined(HAS_ARGBTOYROW_SSSE3) || defined(HAS_ARGBGRAYROW_SSSE3)
@@ -39,12 +42,18 @@ static const vec8 kARGBToU = {112, -74, -38, 0, 112, -74, -38, 0,
static const vec8 kARGBToUJ = {127, -84, -43, 0, 127, -84, -43, 0,
127, -84, -43, 0, 127, -84, -43, 0};
+static const vec8 kABGRToUJ = {-43, -84, 127, 0, -43, -84, 127, 0,
+ -43, -84, 127, 0, -43, -84, 127, 0};
+
static const vec8 kARGBToV = {-18, -94, 112, 0, -18, -94, 112, 0,
-18, -94, 112, 0, -18, -94, 112, 0};
static const vec8 kARGBToVJ = {-20, -107, 127, 0, -20, -107, 127, 0,
-20, -107, 127, 0, -20, -107, 127, 0};
+static const vec8 kABGRToVJ = {127, -107, -20, 0, 127, -107, -20, 0,
+ 127, -107, -20, 0, 127, -107, -20, 0};
+
// Constants for BGRA
static const uvec8 kBGRAToY = {0u, 66u, 129u, 25u, 0u, 66u, 129u, 25u,
0u, 66u, 129u, 25u, 0u, 66u, 129u, 25u};
@@ -729,7 +738,7 @@ void ARGBToRGB565Row_SSE2(const uint8_t* src, uint8_t* dst, int width) {
void ARGBToRGB565DitherRow_SSE2(const uint8_t* src,
uint8_t* dst,
- const uint32_t dither4,
+ uint32_t dither4,
int width) {
asm volatile(
"movd %3,%%xmm6 \n"
@@ -777,7 +786,7 @@ void ARGBToRGB565DitherRow_SSE2(const uint8_t* src,
#ifdef HAS_ARGBTORGB565DITHERROW_AVX2
void ARGBToRGB565DitherRow_AVX2(const uint8_t* src,
uint8_t* dst,
- const uint32_t dither4,
+ uint32_t dither4,
int width) {
asm volatile(
"vbroadcastss %3,%%xmm6 \n"
@@ -1201,6 +1210,7 @@ void ARGBToAR64Row_AVX2(const uint8_t* src_argb,
"lea 0x40(%1),%1 \n"
"sub $0x8,%2 \n"
"jg 1b \n"
+ "vzeroupper \n"
: "+r"(src_argb), // %0
"+r"(dst_ar64), // %1
"+r"(width) // %2
@@ -1228,6 +1238,7 @@ void ARGBToAB64Row_AVX2(const uint8_t* src_argb,
"lea 0x40(%1),%1 \n"
"sub $0x8,%2 \n"
"jg 1b \n"
+ "vzeroupper \n"
: "+r"(src_argb), // %0
"+r"(dst_ab64), // %1
"+r"(width) // %2
@@ -1256,6 +1267,7 @@ void AR64ToARGBRow_AVX2(const uint16_t* src_ar64,
"lea 0x20(%1),%1 \n"
"sub $0x8,%2 \n"
"jg 1b \n"
+ "vzeroupper \n"
: "+r"(src_ar64), // %0
"+r"(dst_argb), // %1
"+r"(width) // %2
@@ -1284,6 +1296,7 @@ void AB64ToARGBRow_AVX2(const uint16_t* src_ab64,
"lea 0x20(%1),%1 \n"
"sub $0x8,%2 \n"
"jg 1b \n"
+ "vzeroupper \n"
: "+r"(src_ab64), // %0
"+r"(dst_argb), // %1
"+r"(width) // %2
@@ -1398,6 +1411,24 @@ void ARGBToYJRow_SSSE3(const uint8_t* src_argb, uint8_t* dst_y, int width) {
}
#endif // HAS_ARGBTOYJROW_SSSE3
+#ifdef HAS_ABGRTOYJROW_SSSE3
+// Convert 16 ABGR pixels (64 bytes) to 16 YJ values.
+// Same as ABGRToYRow but different coefficients, no add 16.
+void ABGRToYJRow_SSSE3(const uint8_t* src_abgr, uint8_t* dst_y, int width) {
+ asm volatile(
+ "movdqa %3,%%xmm4 \n"
+ "movdqa %4,%%xmm5 \n"
+
+ LABELALIGN RGBTOY(xmm5)
+ : "+r"(src_abgr), // %0
+ "+r"(dst_y), // %1
+ "+r"(width) // %2
+ : "m"(kABGRToYJ), // %3
+ "m"(kSub128) // %4
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6");
+}
+#endif // HAS_ABGRTOYJROW_SSSE3
+
#ifdef HAS_RGBATOYJROW_SSSE3
// Convert 16 ARGB pixels (64 bytes) to 16 YJ values.
// Same as ARGBToYRow but different coefficients, no add 16.
@@ -1416,7 +1447,8 @@ void RGBAToYJRow_SSSE3(const uint8_t* src_rgba, uint8_t* dst_y, int width) {
}
#endif // HAS_RGBATOYJROW_SSSE3
-#if defined(HAS_ARGBTOYROW_AVX2) || defined(HAS_ARGBEXTRACTALPHAROW_AVX2)
+#if defined(HAS_ARGBTOYROW_AVX2) || defined(HAS_ABGRTOYROW_AVX2) || \
+ defined(HAS_ARGBEXTRACTALPHAROW_AVX2)
// vpermd for vphaddw + vpackuswb vpermd.
static const lvec32 kPermdARGBToY_AVX = {0, 4, 1, 5, 2, 6, 3, 7};
#endif
@@ -1429,9 +1461,8 @@ void ARGBToYRow_AVX2(const uint8_t* src_argb, uint8_t* dst_y, int width) {
"vbroadcastf128 %3,%%ymm4 \n"
"vbroadcastf128 %4,%%ymm5 \n"
"vbroadcastf128 %5,%%ymm7 \n"
- "vmovdqu %6,%%ymm6 \n"
-
- LABELALIGN RGBTOY_AVX2(ymm7)
+ "vmovdqu %6,%%ymm6 \n" LABELALIGN RGBTOY_AVX2(
+ ymm7) "vzeroupper \n"
: "+r"(src_argb), // %0
"+r"(dst_y), // %1
"+r"(width) // %2
@@ -1451,9 +1482,8 @@ void ABGRToYRow_AVX2(const uint8_t* src_abgr, uint8_t* dst_y, int width) {
"vbroadcastf128 %3,%%ymm4 \n"
"vbroadcastf128 %4,%%ymm5 \n"
"vbroadcastf128 %5,%%ymm7 \n"
- "vmovdqu %6,%%ymm6 \n"
-
- LABELALIGN RGBTOY_AVX2(ymm7)
+ "vmovdqu %6,%%ymm6 \n" LABELALIGN RGBTOY_AVX2(
+ ymm7) "vzeroupper \n"
: "+r"(src_abgr), // %0
"+r"(dst_y), // %1
"+r"(width) // %2
@@ -1472,9 +1502,8 @@ void ARGBToYJRow_AVX2(const uint8_t* src_argb, uint8_t* dst_y, int width) {
asm volatile(
"vbroadcastf128 %3,%%ymm4 \n"
"vbroadcastf128 %4,%%ymm5 \n"
- "vmovdqu %5,%%ymm6 \n"
-
- LABELALIGN RGBTOY_AVX2(ymm5)
+ "vmovdqu %5,%%ymm6 \n" LABELALIGN RGBTOY_AVX2(
+ ymm5) "vzeroupper \n"
: "+r"(src_argb), // %0
"+r"(dst_y), // %1
"+r"(width) // %2
@@ -1486,15 +1515,32 @@ void ARGBToYJRow_AVX2(const uint8_t* src_argb, uint8_t* dst_y, int width) {
}
#endif // HAS_ARGBTOYJROW_AVX2
+#ifdef HAS_ABGRTOYJROW_AVX2
+// Convert 32 ABGR pixels (128 bytes) to 32 Y values.
+void ABGRToYJRow_AVX2(const uint8_t* src_abgr, uint8_t* dst_y, int width) {
+ asm volatile(
+ "vbroadcastf128 %3,%%ymm4 \n"
+ "vbroadcastf128 %4,%%ymm5 \n"
+ "vmovdqu %5,%%ymm6 \n" LABELALIGN RGBTOY_AVX2(
+ ymm5) "vzeroupper \n"
+ : "+r"(src_abgr), // %0
+ "+r"(dst_y), // %1
+ "+r"(width) // %2
+ : "m"(kABGRToYJ), // %3
+ "m"(kSub128), // %4
+ "m"(kPermdARGBToY_AVX) // %5
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6",
+ "xmm7");
+}
+#endif // HAS_ABGRTOYJROW_AVX2
+
#ifdef HAS_RGBATOYJROW_AVX2
// Convert 32 ARGB pixels (128 bytes) to 32 Y values.
void RGBAToYJRow_AVX2(const uint8_t* src_rgba, uint8_t* dst_y, int width) {
asm volatile(
"vbroadcastf128 %3,%%ymm4 \n"
"vbroadcastf128 %4,%%ymm5 \n"
- "vmovdqu %5,%%ymm6 \n"
-
- LABELALIGN RGBTOY_AVX2(
+ "vmovdqu %5,%%ymm6 \n" LABELALIGN RGBTOY_AVX2(
ymm5) "vzeroupper \n"
: "+r"(src_rgba), // %0
"+r"(dst_y), // %1
@@ -1571,11 +1617,15 @@ void ARGBToUVRow_SSSE3(const uint8_t* src_argb,
}
#endif // HAS_ARGBTOUVROW_SSSE3
-#ifdef HAS_ARGBTOUVROW_AVX2
+#if defined(HAS_ARGBTOUVROW_AVX2) || defined(HAS_ABGRTOUVROW_AVX2) || \
+ defined(HAS_ARGBTOUVJROW_AVX2) || defined(HAS_ABGRTOUVJROW_AVX2)
// vpshufb for vphaddw + vpackuswb packed to shorts.
static const lvec8 kShufARGBToUV_AVX = {
0, 1, 8, 9, 2, 3, 10, 11, 4, 5, 12, 13, 6, 7, 14, 15,
0, 1, 8, 9, 2, 3, 10, 11, 4, 5, 12, 13, 6, 7, 14, 15};
+#endif
+
+#if defined(HAS_ARGBTOUVROW_AVX2)
void ARGBToUVRow_AVX2(const uint8_t* src_argb,
int src_stride_argb,
uint8_t* dst_u,
@@ -1765,6 +1815,71 @@ void ARGBToUVJRow_AVX2(const uint8_t* src_argb,
}
#endif // HAS_ARGBTOUVJROW_AVX2
+// TODO(fbarchard): Pass kABGRToVJ / kABGRToUJ as matrix
+#ifdef HAS_ABGRTOUVJROW_AVX2
+void ABGRToUVJRow_AVX2(const uint8_t* src_abgr,
+ int src_stride_abgr,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width) {
+ asm volatile(
+ "vbroadcastf128 %5,%%ymm5 \n"
+ "vbroadcastf128 %6,%%ymm6 \n"
+ "vbroadcastf128 %7,%%ymm7 \n"
+ "sub %1,%2 \n"
+
+ LABELALIGN
+ "1: \n"
+ "vmovdqu (%0),%%ymm0 \n"
+ "vmovdqu 0x20(%0),%%ymm1 \n"
+ "vmovdqu 0x40(%0),%%ymm2 \n"
+ "vmovdqu 0x60(%0),%%ymm3 \n"
+ "vpavgb 0x00(%0,%4,1),%%ymm0,%%ymm0 \n"
+ "vpavgb 0x20(%0,%4,1),%%ymm1,%%ymm1 \n"
+ "vpavgb 0x40(%0,%4,1),%%ymm2,%%ymm2 \n"
+ "vpavgb 0x60(%0,%4,1),%%ymm3,%%ymm3 \n"
+ "lea 0x80(%0),%0 \n"
+ "vshufps $0x88,%%ymm1,%%ymm0,%%ymm4 \n"
+ "vshufps $0xdd,%%ymm1,%%ymm0,%%ymm0 \n"
+ "vpavgb %%ymm4,%%ymm0,%%ymm0 \n"
+ "vshufps $0x88,%%ymm3,%%ymm2,%%ymm4 \n"
+ "vshufps $0xdd,%%ymm3,%%ymm2,%%ymm2 \n"
+ "vpavgb %%ymm4,%%ymm2,%%ymm2 \n"
+
+ "vpmaddubsw %%ymm7,%%ymm0,%%ymm1 \n"
+ "vpmaddubsw %%ymm7,%%ymm2,%%ymm3 \n"
+ "vpmaddubsw %%ymm6,%%ymm0,%%ymm0 \n"
+ "vpmaddubsw %%ymm6,%%ymm2,%%ymm2 \n"
+ "vphaddw %%ymm3,%%ymm1,%%ymm1 \n"
+ "vphaddw %%ymm2,%%ymm0,%%ymm0 \n"
+ "vpaddw %%ymm5,%%ymm0,%%ymm0 \n"
+ "vpaddw %%ymm5,%%ymm1,%%ymm1 \n"
+ "vpsraw $0x8,%%ymm1,%%ymm1 \n"
+ "vpsraw $0x8,%%ymm0,%%ymm0 \n"
+ "vpacksswb %%ymm0,%%ymm1,%%ymm0 \n"
+ "vpermq $0xd8,%%ymm0,%%ymm0 \n"
+ "vpshufb %8,%%ymm0,%%ymm0 \n"
+
+ "vextractf128 $0x0,%%ymm0,(%1) \n"
+ "vextractf128 $0x1,%%ymm0,0x0(%1,%2,1) \n"
+ "lea 0x10(%1),%1 \n"
+ "sub $0x20,%3 \n"
+ "jg 1b \n"
+ "vzeroupper \n"
+ : "+r"(src_abgr), // %0
+ "+r"(dst_u), // %1
+ "+r"(dst_v), // %2
+ "+rm"(width) // %3
+ : "r"((intptr_t)(src_stride_abgr)), // %4
+ "m"(kSub128), // %5
+ "m"(kABGRToVJ), // %6
+ "m"(kABGRToUJ), // %7
+ "m"(kShufARGBToUV_AVX) // %8
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6",
+ "xmm7");
+}
+#endif // HAS_ABGRTOUVJROW_AVX2
+
#ifdef HAS_ARGBTOUVJROW_SSSE3
void ARGBToUVJRow_SSSE3(const uint8_t* src_argb,
int src_stride_argb,
@@ -1831,6 +1946,72 @@ void ARGBToUVJRow_SSSE3(const uint8_t* src_argb,
}
#endif // HAS_ARGBTOUVJROW_SSSE3
+#ifdef HAS_ABGRTOUVJROW_SSSE3
+void ABGRToUVJRow_SSSE3(const uint8_t* src_abgr,
+ int src_stride_abgr,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width) {
+ asm volatile(
+ "movdqa %5,%%xmm3 \n"
+ "movdqa %6,%%xmm4 \n"
+ "movdqa %7,%%xmm5 \n"
+ "sub %1,%2 \n"
+
+ LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm0 \n"
+ "movdqu 0x00(%0,%4,1),%%xmm7 \n"
+ "pavgb %%xmm7,%%xmm0 \n"
+ "movdqu 0x10(%0),%%xmm1 \n"
+ "movdqu 0x10(%0,%4,1),%%xmm7 \n"
+ "pavgb %%xmm7,%%xmm1 \n"
+ "movdqu 0x20(%0),%%xmm2 \n"
+ "movdqu 0x20(%0,%4,1),%%xmm7 \n"
+ "pavgb %%xmm7,%%xmm2 \n"
+ "movdqu 0x30(%0),%%xmm6 \n"
+ "movdqu 0x30(%0,%4,1),%%xmm7 \n"
+ "pavgb %%xmm7,%%xmm6 \n"
+
+ "lea 0x40(%0),%0 \n"
+ "movdqa %%xmm0,%%xmm7 \n"
+ "shufps $0x88,%%xmm1,%%xmm0 \n"
+ "shufps $0xdd,%%xmm1,%%xmm7 \n"
+ "pavgb %%xmm7,%%xmm0 \n"
+ "movdqa %%xmm2,%%xmm7 \n"
+ "shufps $0x88,%%xmm6,%%xmm2 \n"
+ "shufps $0xdd,%%xmm6,%%xmm7 \n"
+ "pavgb %%xmm7,%%xmm2 \n"
+ "movdqa %%xmm0,%%xmm1 \n"
+ "movdqa %%xmm2,%%xmm6 \n"
+ "pmaddubsw %%xmm4,%%xmm0 \n"
+ "pmaddubsw %%xmm4,%%xmm2 \n"
+ "pmaddubsw %%xmm3,%%xmm1 \n"
+ "pmaddubsw %%xmm3,%%xmm6 \n"
+ "phaddw %%xmm2,%%xmm0 \n"
+ "phaddw %%xmm6,%%xmm1 \n"
+ "paddw %%xmm5,%%xmm0 \n"
+ "paddw %%xmm5,%%xmm1 \n"
+ "psraw $0x8,%%xmm0 \n"
+ "psraw $0x8,%%xmm1 \n"
+ "packsswb %%xmm1,%%xmm0 \n"
+ "movlps %%xmm0,(%1) \n"
+ "movhps %%xmm0,0x00(%1,%2,1) \n"
+ "lea 0x8(%1),%1 \n"
+ "sub $0x10,%3 \n"
+ "jg 1b \n"
+ : "+r"(src_abgr), // %0
+ "+r"(dst_u), // %1
+ "+r"(dst_v), // %2
+ "+rm"(width) // %3
+ : "r"((intptr_t)(src_stride_abgr)), // %4
+ "m"(kABGRToVJ), // %5
+ "m"(kABGRToUJ), // %6
+ "m"(kSub128) // %7
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm6", "xmm7");
+}
+#endif // HAS_ABGRTOUVJROW_SSSE3
+
#ifdef HAS_ARGBTOUV444ROW_SSSE3
void ARGBToUV444Row_SSSE3(const uint8_t* src_argb,
uint8_t* dst_u,
@@ -2153,9 +2334,6 @@ void RGBAToUVRow_SSSE3(const uint8_t* src_rgba,
"lea 0x8(%[y_buf]),%[y_buf] \n"
// Read 4 UV from 422 10 bit, upsample to 8 UV
-// TODO(fbarchard): Consider shufb to replace pack/unpack
-// TODO(fbarchard): Consider pmulhuw to replace psraw
-// TODO(fbarchard): Consider pmullw to replace psllw and allow different bits.
#define READYUV210 \
"movq (%[u_buf]),%%xmm3 \n" \
"movq 0x00(%[u_buf],%[v_buf],1),%%xmm1 \n" \
@@ -2165,7 +2343,10 @@ void RGBAToUVRow_SSSE3(const uint8_t* src_rgba,
"packuswb %%xmm3,%%xmm3 \n" \
"punpcklwd %%xmm3,%%xmm3 \n" \
"movdqu (%[y_buf]),%%xmm4 \n" \
+ "movdqa %%xmm4,%%xmm2 \n" \
"psllw $6,%%xmm4 \n" \
+ "psrlw $4,%%xmm2 \n" \
+ "paddw %%xmm2,%%xmm4 \n" \
"lea 0x10(%[y_buf]),%[y_buf] \n"
#define READYUVA210 \
@@ -2177,7 +2358,10 @@ void RGBAToUVRow_SSSE3(const uint8_t* src_rgba,
"packuswb %%xmm3,%%xmm3 \n" \
"punpcklwd %%xmm3,%%xmm3 \n" \
"movdqu (%[y_buf]),%%xmm4 \n" \
+ "movdqa %%xmm4,%%xmm2 \n" \
"psllw $6,%%xmm4 \n" \
+ "psrlw $4,%%xmm2 \n" \
+ "paddw %%xmm2,%%xmm4 \n" \
"lea 0x10(%[y_buf]),%[y_buf] \n" \
"movdqu (%[a_buf]),%%xmm5 \n" \
"psraw $2,%%xmm5 \n" \
@@ -2196,7 +2380,10 @@ void RGBAToUVRow_SSSE3(const uint8_t* src_rgba,
"punpckhwd %%xmm2,%%xmm1 \n" \
"packuswb %%xmm1,%%xmm3 \n" \
"movdqu (%[y_buf]),%%xmm4 \n" \
+ "movdqa %%xmm4,%%xmm2 \n" \
"psllw $6,%%xmm4 \n" \
+ "psrlw $4,%%xmm2 \n" \
+ "paddw %%xmm2,%%xmm4 \n" \
"lea 0x10(%[y_buf]),%[y_buf] \n"
// Read 8 UV from 444 10 bit. With 8 Alpha.
@@ -2211,7 +2398,10 @@ void RGBAToUVRow_SSSE3(const uint8_t* src_rgba,
"punpckhwd %%xmm2,%%xmm1 \n" \
"packuswb %%xmm1,%%xmm3 \n" \
"movdqu (%[y_buf]),%%xmm4 \n" \
- "psllw $0x6,%%xmm4 \n" \
+ "movdqa %%xmm4,%%xmm2 \n" \
+ "psllw $6,%%xmm4 \n" \
+ "psrlw $4,%%xmm2 \n" \
+ "paddw %%xmm2,%%xmm4 \n" \
"lea 0x10(%[y_buf]),%[y_buf] \n" \
"movdqu (%[a_buf]),%%xmm5 \n" \
"psraw $2,%%xmm5 \n" \
@@ -2228,7 +2418,10 @@ void RGBAToUVRow_SSSE3(const uint8_t* src_rgba,
"packuswb %%xmm3,%%xmm3 \n" \
"punpcklwd %%xmm3,%%xmm3 \n" \
"movdqu (%[y_buf]),%%xmm4 \n" \
- "psllw $0x4,%%xmm4 \n" \
+ "movdqa %%xmm4,%%xmm2 \n" \
+ "psllw $4,%%xmm4 \n" \
+ "psrlw $8,%%xmm2 \n" \
+ "paddw %%xmm2,%%xmm4 \n" \
"lea 0x10(%[y_buf]),%[y_buf] \n"
// Read 4 UV from 422, upsample to 8 UV. With 8 Alpha.
@@ -2399,6 +2592,20 @@ void RGBAToUVRow_SSSE3(const uint8_t* src_rgba,
"movdqu %%xmm0,0x10(%[dst_rgba]) \n" \
"lea 0x20(%[dst_rgba]),%[dst_rgba] \n"
+// Store 8 RGB24 values.
+#define STORERGB24 \
+ "punpcklbw %%xmm1,%%xmm0 \n" \
+ "punpcklbw %%xmm2,%%xmm2 \n" \
+ "movdqa %%xmm0,%%xmm1 \n" \
+ "punpcklwd %%xmm2,%%xmm0 \n" \
+ "punpckhwd %%xmm2,%%xmm1 \n" \
+ "pshufb %%xmm5,%%xmm0 \n" \
+ "pshufb %%xmm6,%%xmm1 \n" \
+ "palignr $0xc,%%xmm0,%%xmm1 \n" \
+ "movq %%xmm0,(%[dst_rgb24]) \n" \
+ "movdqu %%xmm1,0x8(%[dst_rgb24]) \n" \
+ "lea 0x18(%[dst_rgb24]),%[dst_rgb24] \n"
+
// Store 8 AR30 values.
#define STOREAR30 \
"psraw $0x4,%%xmm0 \n" \
@@ -2508,17 +2715,43 @@ void OMITFP I422ToRGB24Row_SSSE3(const uint8_t* y_buf,
"1: \n"
READYUV422
YUVTORGB(yuvconstants)
- "punpcklbw %%xmm1,%%xmm0 \n"
- "punpcklbw %%xmm2,%%xmm2 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "punpcklwd %%xmm2,%%xmm0 \n"
- "punpckhwd %%xmm2,%%xmm1 \n"
- "pshufb %%xmm5,%%xmm0 \n"
- "pshufb %%xmm6,%%xmm1 \n"
- "palignr $0xc,%%xmm0,%%xmm1 \n"
- "movq %%xmm0,(%[dst_rgb24]) \n"
- "movdqu %%xmm1,0x8(%[dst_rgb24]) \n"
- "lea 0x18(%[dst_rgb24]),%[dst_rgb24] \n"
+ STORERGB24
+ "subl $0x8,%[width] \n"
+ "jg 1b \n"
+ : [y_buf]"+r"(y_buf), // %[y_buf]
+ [u_buf]"+r"(u_buf), // %[u_buf]
+ [v_buf]"+r"(v_buf), // %[v_buf]
+ [dst_rgb24]"+r"(dst_rgb24), // %[dst_rgb24]
+#if defined(__i386__)
+ [width]"+m"(width) // %[width]
+#else
+ [width]"+rm"(width) // %[width]
+#endif
+ : [yuvconstants]"r"(yuvconstants), // %[yuvconstants]
+ [kShuffleMaskARGBToRGB24_0]"m"(kShuffleMaskARGBToRGB24_0),
+ [kShuffleMaskARGBToRGB24]"m"(kShuffleMaskARGBToRGB24)
+ : "memory", "cc", YUVTORGB_REGS
+ "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"
+ );
+}
+
+void OMITFP I444ToRGB24Row_SSSE3(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_rgb24,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ asm volatile (
+ YUVTORGB_SETUP(yuvconstants)
+ "movdqa %[kShuffleMaskARGBToRGB24_0],%%xmm5 \n"
+ "movdqa %[kShuffleMaskARGBToRGB24],%%xmm6 \n"
+ "sub %[u_buf],%[v_buf] \n"
+
+ LABELALIGN
+ "1: \n"
+ READYUV444
+ YUVTORGB(yuvconstants)
+ STORERGB24
"subl $0x8,%[width] \n"
"jg 1b \n"
: [y_buf]"+r"(y_buf), // %[y_buf]
@@ -3209,7 +3442,9 @@ void OMITFP I422ToRGBARow_SSSE3(const uint8_t* y_buf,
"vpackuswb %%ymm3,%%ymm3,%%ymm3 \n" \
"vpunpcklwd %%ymm3,%%ymm3,%%ymm3 \n" \
"vmovdqu (%[y_buf]),%%ymm4 \n" \
- "vpsllw $6,%%ymm4,%%ymm4 \n" \
+ "vpsllw $6,%%ymm4,%%ymm2 \n" \
+ "vpsrlw $4,%%ymm4,%%ymm4 \n" \
+ "vpaddw %%ymm2,%%ymm4,%%ymm4 \n" \
"lea 0x20(%[y_buf]),%[y_buf] \n"
// Read 8 UV from 210, upsample to 16 UV. With 16 Alpha.
@@ -3224,7 +3459,9 @@ void OMITFP I422ToRGBARow_SSSE3(const uint8_t* y_buf,
"vpackuswb %%ymm3,%%ymm3,%%ymm3 \n" \
"vpunpcklwd %%ymm3,%%ymm3,%%ymm3 \n" \
"vmovdqu (%[y_buf]),%%ymm4 \n" \
- "vpsllw $6,%%ymm4,%%ymm4 \n" \
+ "vpsllw $6,%%ymm4,%%ymm2 \n" \
+ "vpsrlw $4,%%ymm4,%%ymm4 \n" \
+ "vpaddw %%ymm2,%%ymm4,%%ymm4 \n" \
"lea 0x20(%[y_buf]),%[y_buf] \n" \
"vmovdqu (%[a_buf]),%%ymm5 \n" \
"vpsraw $2,%%ymm5,%%ymm5 \n" \
@@ -3242,7 +3479,9 @@ void OMITFP I422ToRGBARow_SSSE3(const uint8_t* y_buf,
"vpunpcklwd %%ymm2,%%ymm3,%%ymm3 \n" \
"vpackuswb %%ymm1,%%ymm3,%%ymm3 \n" \
"vmovdqu (%[y_buf]),%%ymm4 \n" \
- "vpsllw $6,%%ymm4,%%ymm4 \n" \
+ "vpsllw $6,%%ymm4,%%ymm2 \n" \
+ "vpsrlw $4,%%ymm4,%%ymm4 \n" \
+ "vpaddw %%ymm2,%%ymm4,%%ymm4 \n" \
"lea 0x20(%[y_buf]),%[y_buf] \n"
// Read 8 UV from 212 12 bit, upsample to 16 UV
@@ -3257,7 +3496,9 @@ void OMITFP I422ToRGBARow_SSSE3(const uint8_t* y_buf,
"vpackuswb %%ymm3,%%ymm3,%%ymm3 \n" \
"vpunpcklwd %%ymm3,%%ymm3,%%ymm3 \n" \
"vmovdqu (%[y_buf]),%%ymm4 \n" \
- "vpsllw $0x4,%%ymm4,%%ymm4 \n" \
+ "vpsllw $4,%%ymm4,%%ymm2 \n" \
+ "vpsrlw $8,%%ymm4,%%ymm4 \n" \
+ "vpaddw %%ymm2,%%ymm4,%%ymm4 \n" \
"lea 0x20(%[y_buf]),%[y_buf] \n"
// Read 16 UV from 410. With 16 Alpha.
@@ -3271,7 +3512,9 @@ void OMITFP I422ToRGBARow_SSSE3(const uint8_t* y_buf,
"vpunpcklwd %%ymm2,%%ymm3,%%ymm3 \n" \
"vpackuswb %%ymm1,%%ymm3,%%ymm3 \n" \
"vmovdqu (%[y_buf]),%%ymm4 \n" \
- "vpsllw $6,%%ymm4,%%ymm4 \n" \
+ "vpsllw $6,%%ymm4,%%ymm2 \n" \
+ "vpsrlw $4,%%ymm4,%%ymm4 \n" \
+ "vpaddw %%ymm2,%%ymm4,%%ymm4 \n" \
"lea 0x20(%[y_buf]),%[y_buf] \n" \
"vmovdqu (%[a_buf]),%%ymm5 \n" \
"vpsraw $2,%%ymm5,%%ymm5 \n" \
@@ -4785,6 +5028,84 @@ void DetileRow_SSE2(const uint8_t* src,
}
#endif // HAS_DETILEROW_SSE2
+#ifdef HAS_DETILEROW_16_SSE2
+void DetileRow_16_SSE2(const uint16_t* src,
+ ptrdiff_t src_tile_stride,
+ uint16_t* dst,
+ int width) {
+ asm volatile(
+ "1: \n"
+ "movdqu (%0),%%xmm0 \n"
+ "movdqu 0x10(%0),%%xmm1 \n"
+ "lea (%0,%3,2),%0 \n"
+ "movdqu %%xmm0,(%1) \n"
+ "movdqu %%xmm1,0x10(%1) \n"
+ "lea 0x20(%1),%1 \n"
+ "sub $0x10,%2 \n"
+ "jg 1b \n"
+ : "+r"(src), // %0
+ "+r"(dst), // %1
+ "+r"(width) // %2
+ : "r"(src_tile_stride) // %3
+ : "cc", "memory", "xmm0", "xmm1");
+}
+#endif // HAS_DETILEROW_SSE2
+
+#ifdef HAS_DETILEROW_16_AVX
+void DetileRow_16_AVX(const uint16_t* src,
+ ptrdiff_t src_tile_stride,
+ uint16_t* dst,
+ int width) {
+ asm volatile(
+ "1: \n"
+ "vmovdqu (%0),%%ymm0 \n"
+ "lea (%0,%3,2),%0 \n"
+ "vmovdqu %%ymm0,(%1) \n"
+ "lea 0x20(%1),%1 \n"
+ "sub $0x10,%2 \n"
+ "jg 1b \n"
+ "vzeroupper \n"
+ : "+r"(src), // %0
+ "+r"(dst), // %1
+ "+r"(width) // %2
+ : "r"(src_tile_stride) // %3
+ : "cc", "memory", "xmm0");
+}
+#endif // HAS_DETILEROW_AVX
+
+#ifdef HAS_DETILETOYUY2_SSE2
+// Read 16 Y, 8 UV, and write 8 YUYV.
+void DetileToYUY2_SSE2(const uint8_t* src_y,
+ ptrdiff_t src_y_tile_stride,
+ const uint8_t* src_uv,
+ ptrdiff_t src_uv_tile_stride,
+ uint8_t* dst_yuy2,
+ int width) {
+ asm volatile(
+ "1: \n"
+ "movdqu (%0),%%xmm0 \n" // Load 16 Y
+ "sub $0x10,%3 \n"
+ "lea (%0,%4),%0 \n"
+ "movdqu (%1),%%xmm1 \n" // Load 8 UV
+ "lea (%1,%5),%1 \n"
+ "movdqu %%xmm0,%%xmm2 \n"
+ "punpcklbw %%xmm1,%%xmm0 \n"
+ "punpckhbw %%xmm1,%%xmm2 \n"
+ "movdqu %%xmm0,(%2) \n"
+ "movdqu %%xmm2,0x10(%2) \n"
+ "lea 0x20(%2),%2 \n"
+ "jg 1b \n"
+ : "+r"(src_y), // %0
+ "+r"(src_uv), // %1
+ "+r"(dst_yuy2), // %2
+ "+r"(width) // %3
+ : "r"(src_y_tile_stride), // %4
+ "r"(src_uv_tile_stride) // %5
+ : "cc", "memory", "xmm0", "xmm1", "xmm2" // Clobber list
+ );
+}
+#endif
+
#ifdef HAS_DETILESPLITUVROW_SSSE3
// TODO(greenjustin): Look into generating these constants instead of loading
// them since this can cause branch mispredicts for fPIC code on 32-bit
@@ -4821,36 +5142,59 @@ void DetileSplitUVRow_SSSE3(const uint8_t* src_uv,
}
#endif // HAS_DETILESPLITUVROW_SSSE3
+#ifdef HAS_MERGEUVROW_AVX512BW
+void MergeUVRow_AVX512BW(const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_uv,
+ int width) {
+ asm volatile("sub %0,%1 \n"
+
+ LABELALIGN
+ "1: \n"
+ "vpmovzxbw (%0),%%zmm0 \n"
+ "vpmovzxbw 0x00(%0,%1,1),%%zmm1 \n"
+ "lea 0x20(%0),%0 \n"
+ "vpsllw $0x8,%%zmm1,%%zmm1 \n"
+ "vporq %%zmm0,%%zmm1,%%zmm2 \n"
+ "vmovdqu64 %%zmm2,(%2) \n"
+ "lea 0x40(%2),%2 \n"
+ "sub $0x20,%3 \n"
+ "jg 1b \n"
+ "vzeroupper \n"
+ : "+r"(src_u), // %0
+ "+r"(src_v), // %1
+ "+r"(dst_uv), // %2
+ "+r"(width) // %3
+ :
+ : "memory", "cc", "xmm0", "xmm1", "xmm2");
+}
+#endif // HAS_MERGEUVROW_AVX512BW
+
#ifdef HAS_MERGEUVROW_AVX2
void MergeUVRow_AVX2(const uint8_t* src_u,
const uint8_t* src_v,
uint8_t* dst_uv,
int width) {
- asm volatile(
+ asm volatile("sub %0,%1 \n"
- "sub %0,%1 \n"
-
- LABELALIGN
+ LABELALIGN
"1: \n"
- "vmovdqu (%0),%%ymm0 \n"
- "vmovdqu 0x00(%0,%1,1),%%ymm1 \n"
- "lea 0x20(%0),%0 \n"
- "vpunpcklbw %%ymm1,%%ymm0,%%ymm2 \n"
- "vpunpckhbw %%ymm1,%%ymm0,%%ymm0 \n"
- "vextractf128 $0x0,%%ymm2,(%2) \n"
- "vextractf128 $0x0,%%ymm0,0x10(%2) \n"
- "vextractf128 $0x1,%%ymm2,0x20(%2) \n"
- "vextractf128 $0x1,%%ymm0,0x30(%2) \n"
- "lea 0x40(%2),%2 \n"
- "sub $0x20,%3 \n"
+ "vpmovzxbw (%0),%%ymm0 \n"
+ "vpmovzxbw 0x00(%0,%1,1),%%ymm1 \n"
+ "lea 0x10(%0),%0 \n"
+ "vpsllw $0x8,%%ymm1,%%ymm1 \n"
+ "vpor %%ymm0,%%ymm1,%%ymm2 \n"
+ "vmovdqu %%ymm2,(%2) \n"
+ "lea 0x20(%2),%2 \n"
+ "sub $0x10,%3 \n"
"jg 1b \n"
"vzeroupper \n"
- : "+r"(src_u), // %0
- "+r"(src_v), // %1
- "+r"(dst_uv), // %2
- "+r"(width) // %3
- :
- : "memory", "cc", "xmm0", "xmm1", "xmm2");
+ : "+r"(src_u), // %0
+ "+r"(src_v), // %1
+ "+r"(dst_uv), // %2
+ "+r"(width) // %3
+ :
+ : "memory", "cc", "xmm0", "xmm1", "xmm2");
}
#endif // HAS_MERGEUVROW_AVX2
@@ -4859,11 +5203,9 @@ void MergeUVRow_SSE2(const uint8_t* src_u,
const uint8_t* src_v,
uint8_t* dst_uv,
int width) {
- asm volatile(
+ asm volatile("sub %0,%1 \n"
- "sub %0,%1 \n"
-
- LABELALIGN
+ LABELALIGN
"1: \n"
"movdqu (%0),%%xmm0 \n"
"movdqu 0x00(%0,%1,1),%%xmm1 \n"
@@ -4876,12 +5218,12 @@ void MergeUVRow_SSE2(const uint8_t* src_u,
"lea 0x20(%2),%2 \n"
"sub $0x10,%3 \n"
"jg 1b \n"
- : "+r"(src_u), // %0
- "+r"(src_v), // %1
- "+r"(dst_uv), // %2
- "+r"(width) // %3
- :
- : "memory", "cc", "xmm0", "xmm1", "xmm2");
+ : "+r"(src_u), // %0
+ "+r"(src_v), // %1
+ "+r"(dst_uv), // %2
+ "+r"(width) // %3
+ :
+ : "memory", "cc", "xmm0", "xmm1", "xmm2");
}
#endif // HAS_MERGEUVROW_SSE2
@@ -4891,37 +5233,35 @@ void MergeUVRow_16_AVX2(const uint16_t* src_u,
uint16_t* dst_uv,
int depth,
int width) {
- depth = 16 - depth;
// clang-format off
asm volatile (
"vmovd %4,%%xmm3 \n"
+ "vmovd %5,%%xmm4 \n"
+
+
"sub %0,%1 \n"
+ // 8 pixels per loop.
- // 16 pixels per loop.
- LABELALIGN
+ LABELALIGN
"1: \n"
- "vmovdqu (%0),%%ymm0 \n"
- "vmovdqu (%0,%1,1),%%ymm1 \n"
- "add $0x20,%0 \n"
-
+ "vpmovzxwd (%0),%%ymm0 \n"
+ "vpmovzxwd 0x00(%0,%1,1),%%ymm1 \n"
+ "lea 0x10(%0),%0 \n"
"vpsllw %%xmm3,%%ymm0,%%ymm0 \n"
- "vpsllw %%xmm3,%%ymm1,%%ymm1 \n"
- "vpunpcklwd %%ymm1,%%ymm0,%%ymm2 \n" // mutates
- "vpunpckhwd %%ymm1,%%ymm0,%%ymm0 \n"
- "vextractf128 $0x0,%%ymm2,(%2) \n"
- "vextractf128 $0x0,%%ymm0,0x10(%2) \n"
- "vextractf128 $0x1,%%ymm2,0x20(%2) \n"
- "vextractf128 $0x1,%%ymm0,0x30(%2) \n"
- "add $0x40,%2 \n"
- "sub $0x10,%3 \n"
+ "vpslld %%xmm4,%%ymm1,%%ymm1 \n"
+ "vpor %%ymm0,%%ymm1,%%ymm2 \n"
+ "vmovdqu %%ymm2,(%2) \n"
+ "lea 0x20(%2),%2 \n"
+ "sub $0x8,%3 \n"
"jg 1b \n"
"vzeroupper \n"
- : "+r"(src_u), // %0
- "+r"(src_v), // %1
- "+r"(dst_uv), // %2
- "+r"(width) // %3
- : "r"(depth) // %4
- : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3");
+ : "+r"(src_u), // %0
+ "+r"(src_v), // %1
+ "+r"(dst_uv), // %2
+ "+r"(width) // %3
+ : "r"(16 - depth), // %4
+ "r"(32 - depth) // %5
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4");
// clang-format on
}
#endif // HAS_MERGEUVROW_AVX2
@@ -5127,7 +5467,6 @@ void Convert16To8Row_AVX2(const uint16_t* src_y,
// 512 = 9 bits
// 1024 = 10 bits
// 4096 = 12 bits
-// TODO(fbarchard): reduce to SSE2
void Convert8To16Row_SSE2(const uint8_t* src_y,
uint16_t* dst_y,
int scale,
@@ -6178,6 +6517,7 @@ void CopyRow_AVX(const uint8_t* src, uint8_t* dst, int width) {
"lea 0x40(%1),%1 \n"
"sub $0x40,%2 \n"
"jg 1b \n"
+ "vzeroupper \n"
: "+r"(src), // %0
"+r"(dst), // %1
"+r"(width) // %2
@@ -6461,6 +6801,33 @@ void YUY2ToYRow_SSE2(const uint8_t* src_yuy2, uint8_t* dst_y, int width) {
: "memory", "cc", "xmm0", "xmm1", "xmm5");
}
+void YUY2ToNVUVRow_SSE2(const uint8_t* src_yuy2,
+ int stride_yuy2,
+ uint8_t* dst_uv,
+ int width) {
+ asm volatile(LABELALIGN
+ "1: \n"
+ "movdqu (%0),%%xmm0 \n"
+ "movdqu 0x10(%0),%%xmm1 \n"
+ "movdqu 0x00(%0,%3,1),%%xmm2 \n"
+ "movdqu 0x10(%0,%3,1),%%xmm3 \n"
+ "lea 0x20(%0),%0 \n"
+ "pavgb %%xmm2,%%xmm0 \n"
+ "pavgb %%xmm3,%%xmm1 \n"
+ "psrlw $0x8,%%xmm0 \n"
+ "psrlw $0x8,%%xmm1 \n"
+ "packuswb %%xmm1,%%xmm0 \n"
+ "movdqu %%xmm0,(%1) \n"
+ "lea 0x10(%1),%1 \n"
+ "sub $0x10,%2 \n"
+ "jg 1b \n"
+ : "+r"(src_yuy2), // %0
+ "+r"(dst_uv), // %1
+ "+r"(width) // %2
+ : "r"((intptr_t)(stride_yuy2)) // %3
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3");
+}
+
void YUY2ToUVRow_SSE2(const uint8_t* src_yuy2,
int stride_yuy2,
uint8_t* dst_u,
@@ -6661,6 +7028,35 @@ void YUY2ToYRow_AVX2(const uint8_t* src_yuy2, uint8_t* dst_y, int width) {
: "memory", "cc", "xmm0", "xmm1", "xmm5");
}
+void YUY2ToNVUVRow_AVX2(const uint8_t* src_yuy2,
+ int stride_yuy2,
+ uint8_t* dst_uv,
+ int width) {
+ asm volatile(
+
+ LABELALIGN
+ "1: \n"
+ "vmovdqu (%0),%%ymm0 \n"
+ "vmovdqu 0x20(%0),%%ymm1 \n"
+ "vpavgb 0x00(%0,%3,1),%%ymm0,%%ymm0 \n"
+ "vpavgb 0x20(%0,%3,1),%%ymm1,%%ymm1 \n"
+ "lea 0x40(%0),%0 \n"
+ "vpsrlw $0x8,%%ymm0,%%ymm0 \n"
+ "vpsrlw $0x8,%%ymm1,%%ymm1 \n"
+ "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n"
+ "vpermq $0xd8,%%ymm0,%%ymm0 \n"
+ "vmovdqu %%ymm0,(%1) \n"
+ "lea 0x20(%1),%1 \n"
+ "sub $0x20,%2 \n"
+ "jg 1b \n"
+ "vzeroupper \n"
+ : "+r"(src_yuy2), // %0
+ "+r"(dst_uv), // %1
+ "+r"(width) // %2
+ : "r"((intptr_t)(stride_yuy2)) // %3
+ : "memory", "cc", "xmm0", "xmm1");
+}
+
void YUY2ToUVRow_AVX2(const uint8_t* src_yuy2,
int stride_yuy2,
uint8_t* dst_u,
diff --git a/files/source/row_lasx.cc b/files/source/row_lasx.cc
index 7dd18f40..1082ad80 100644
--- a/files/source/row_lasx.cc
+++ b/files/source/row_lasx.cc
@@ -775,40 +775,6 @@ void UYVYToUV422Row_LASX(const uint8_t* src_uyvy,
}
}
-void ARGBToYRow_LASX(const uint8_t* src_argb0, uint8_t* dst_y, int width) {
- int x;
- int len = width / 32;
- __m256i src0, src1, src2, src3, vec0, vec1, vec2, vec3;
- __m256i tmp0, tmp1, dst0;
- __m256i const_19 = __lasx_xvldi(0x19);
- __m256i const_42 = __lasx_xvldi(0x42);
- __m256i const_81 = __lasx_xvldi(0x81);
- __m256i const_1080 = {0x1080108010801080, 0x1080108010801080,
- 0x1080108010801080, 0x1080108010801080};
- __m256i control = {0x0000000400000000, 0x0000000500000001, 0x0000000600000002,
- 0x0000000700000003};
-
- for (x = 0; x < len; x++) {
- DUP4_ARG2(__lasx_xvld, src_argb0, 0, src_argb0, 32, src_argb0, 64,
- src_argb0, 96, src0, src1, src2, src3);
- vec0 = __lasx_xvpickev_b(src1, src0);
- vec1 = __lasx_xvpickev_b(src3, src2);
- vec2 = __lasx_xvpickod_b(src1, src0);
- vec3 = __lasx_xvpickod_b(src3, src2);
- tmp0 = __lasx_xvmaddwev_h_bu(const_1080, vec0, const_19);
- tmp1 = __lasx_xvmaddwev_h_bu(const_1080, vec1, const_19);
- tmp0 = __lasx_xvmaddwev_h_bu(tmp0, vec2, const_81);
- tmp1 = __lasx_xvmaddwev_h_bu(tmp1, vec3, const_81);
- tmp0 = __lasx_xvmaddwod_h_bu(tmp0, vec0, const_42);
- tmp1 = __lasx_xvmaddwod_h_bu(tmp1, vec1, const_42);
- dst0 = __lasx_xvssrani_b_h(tmp1, tmp0, 8);
- dst0 = __lasx_xvperm_w(dst0, control);
- __lasx_xvst(dst0, dst_y, 0);
- src_argb0 += 128;
- dst_y += 32;
- }
-}
-
void ARGBToUVRow_LASX(const uint8_t* src_argb0,
int src_stride_argb,
uint8_t* dst_u,
@@ -1216,7 +1182,7 @@ void ARGBAttenuateRow_LASX(const uint8_t* src_argb,
void ARGBToRGB565DitherRow_LASX(const uint8_t* src_argb,
uint8_t* dst_rgb,
- const uint32_t dither4,
+ uint32_t dither4,
int width) {
int x;
int len = width / 16;
@@ -1811,48 +1777,6 @@ void RGB565ToUVRow_LASX(const uint8_t* src_rgb565,
}
}
-void RGB24ToYRow_LASX(const uint8_t* src_rgb24, uint8_t* dst_y, int width) {
- int x;
- int len = width / 32;
- __m256i src0, src1, src2;
- __m256i tmp0, tmp1, tmp2, tmp3;
- __m256i reg0, reg1, reg2, dst0;
- __m256i const_129 = __lasx_xvldi(129);
- __m256i const_br = {0x4219421942194219, 0x4219421942194219,
- 0x4219421942194219, 0x4219421942194219};
- __m256i const_1080 = {0x1080108010801080, 0x1080108010801080,
- 0x1080108010801080, 0x1080108010801080};
- __m256i shuff0 = {0x0B09080605030200, 0x17151412110F0E0C, 0x0B09080605030200,
- 0x17151412110F0E0C};
- __m256i shuff1 = {0x0301001E1D1B1A18, 0x0F0D0C0A09070604, 0x0301001E1D1B1A18,
- 0x0F0D0C0A09070604};
- __m256i shuff2 = {0x000A000700040001, 0x001600130010000D, 0x000A000700040001,
- 0x001600130010000D};
- __m256i shuff3 = {0x0002001F001C0019, 0x000E000B00080005, 0x0002001F001C0019,
- 0x000E000B00080005};
-
- for (x = 0; x < len; x++) {
- reg0 = __lasx_xvld(src_rgb24, 0);
- reg1 = __lasx_xvld(src_rgb24, 32);
- reg2 = __lasx_xvld(src_rgb24, 64);
- src0 = __lasx_xvpermi_q(reg1, reg0, 0x30);
- src1 = __lasx_xvpermi_q(reg2, reg0, 0x21);
- src2 = __lasx_xvpermi_q(reg2, reg1, 0x30);
- tmp0 = __lasx_xvshuf_b(src1, src0, shuff0);
- tmp1 = __lasx_xvshuf_b(src1, src2, shuff1);
- tmp2 = __lasx_xvshuf_b(src1, src0, shuff2);
- tmp3 = __lasx_xvshuf_b(src1, src2, shuff3);
- reg0 = __lasx_xvmaddwev_h_bu(const_1080, tmp2, const_129);
- reg1 = __lasx_xvmaddwev_h_bu(const_1080, tmp3, const_129);
- reg0 = __lasx_xvdp2add_h_bu(reg0, const_br, tmp0);
- reg1 = __lasx_xvdp2add_h_bu(reg1, const_br, tmp1);
- dst0 = __lasx_xvpickod_b(reg1, reg0);
- __lasx_xvst(dst0, dst_y, 0);
- dst_y += 32;
- src_rgb24 += 96;
- }
-}
-
void RGB24ToUVRow_LASX(const uint8_t* src_rgb24,
int src_stride_rgb24,
uint8_t* dst_u,
@@ -1916,48 +1840,6 @@ void RGB24ToUVRow_LASX(const uint8_t* src_rgb24,
}
}
-void RAWToYRow_LASX(const uint8_t* src_raw, uint8_t* dst_y, int width) {
- int x;
- int len = width / 32;
- __m256i src0, src1, src2;
- __m256i tmp0, tmp1, tmp2, tmp3;
- __m256i reg0, reg1, reg2, dst0;
- __m256i const_129 = __lasx_xvldi(129);
- __m256i const_br = {0x1942194219421942, 0x1942194219421942,
- 0x1942194219421942, 0x1942194219421942};
- __m256i const_1080 = {0x1080108010801080, 0x1080108010801080,
- 0x1080108010801080, 0x1080108010801080};
- __m256i shuff0 = {0x0B09080605030200, 0x17151412110F0E0C, 0x0B09080605030200,
- 0x17151412110F0E0C};
- __m256i shuff1 = {0x0301001E1D1B1A18, 0x0F0D0C0A09070604, 0x0301001E1D1B1A18,
- 0x0F0D0C0A09070604};
- __m256i shuff2 = {0x000A000700040001, 0x001600130010000D, 0x000A000700040001,
- 0x001600130010000D};
- __m256i shuff3 = {0x0002001F001C0019, 0x000E000B00080005, 0x0002001F001C0019,
- 0x000E000B00080005};
-
- for (x = 0; x < len; x++) {
- reg0 = __lasx_xvld(src_raw, 0);
- reg1 = __lasx_xvld(src_raw, 32);
- reg2 = __lasx_xvld(src_raw, 64);
- src0 = __lasx_xvpermi_q(reg1, reg0, 0x30);
- src1 = __lasx_xvpermi_q(reg2, reg0, 0x21);
- src2 = __lasx_xvpermi_q(reg2, reg1, 0x30);
- tmp0 = __lasx_xvshuf_b(src1, src0, shuff0);
- tmp1 = __lasx_xvshuf_b(src1, src2, shuff1);
- tmp2 = __lasx_xvshuf_b(src1, src0, shuff2);
- tmp3 = __lasx_xvshuf_b(src1, src2, shuff3);
- reg0 = __lasx_xvmaddwev_h_bu(const_1080, tmp2, const_129);
- reg1 = __lasx_xvmaddwev_h_bu(const_1080, tmp3, const_129);
- reg0 = __lasx_xvdp2add_h_bu(reg0, const_br, tmp0);
- reg1 = __lasx_xvdp2add_h_bu(reg1, const_br, tmp1);
- dst0 = __lasx_xvpickod_b(reg1, reg0);
- __lasx_xvst(dst0, dst_y, 0);
- dst_y += 32;
- src_raw += 96;
- }
-}
-
void RAWToUVRow_LASX(const uint8_t* src_raw,
int src_stride_raw,
uint8_t* dst_u,
@@ -2118,36 +2000,228 @@ void NV21ToARGBRow_LASX(const uint8_t* src_y,
}
}
-void ARGBToYJRow_LASX(const uint8_t* src_argb, uint8_t* dst_y, int width) {
- int x;
- int len = width / 32;
- __m256i src0, src1, src2, src3, dst0;
- __m256i tmp0, tmp1, tmp2, tmp3;
- __m256i reg0, reg1;
- __m256i const_128 = __lasx_xvldi(0x480);
- __m256i const_150 = __lasx_xvldi(0x96);
- __m256i const_br = {0x4D1D4D1D4D1D4D1D, 0x4D1D4D1D4D1D4D1D,
- 0x4D1D4D1D4D1D4D1D, 0x4D1D4D1D4D1D4D1D};
- __m256i shuff = {0x0000000400000000, 0x0000000500000001, 0x0000000600000002,
- 0x0000000700000003};
+struct RgbConstants {
+ uint8_t kRGBToY[4];
+ uint16_t kAddY;
+ uint16_t pad;
+};
+
+// RGB to JPeg coefficients
+// B * 0.1140 coefficient = 29
+// G * 0.5870 coefficient = 150
+// R * 0.2990 coefficient = 77
+// Add 0.5 = 0x80
+static const struct RgbConstants kRgb24JPEGConstants = {{29, 150, 77, 0},
+ 128,
+ 0};
+
+static const struct RgbConstants kRawJPEGConstants = {{77, 150, 29, 0}, 128, 0};
+
+// RGB to BT.601 coefficients
+// B * 0.1016 coefficient = 25
+// G * 0.5078 coefficient = 129
+// R * 0.2578 coefficient = 66
+// Add 16.5 = 0x1080
+
+static const struct RgbConstants kRgb24I601Constants = {{25, 129, 66, 0},
+ 0x1080,
+ 0};
+
+static const struct RgbConstants kRawI601Constants = {{66, 129, 25, 0},
+ 0x1080,
+ 0};
+
+// ARGB expects first 3 values to contain RGB and 4th value is ignored.
+static void ARGBToYMatrixRow_LASX(const uint8_t* src_argb,
+ uint8_t* dst_y,
+ int width,
+ const struct RgbConstants* rgbconstants) {
+ int32_t shuff[8] = {0, 4, 1, 5, 2, 6, 3, 7};
+ asm volatile(
+ "xvldrepl.b $xr0, %3, 0 \n\t" // load rgbconstants
+ "xvldrepl.b $xr1, %3, 1 \n\t" // load rgbconstants
+ "xvldrepl.b $xr2, %3, 2 \n\t" // load rgbconstants
+ "xvldrepl.h $xr3, %3, 4 \n\t" // load rgbconstants
+ "xvld $xr20, %4, 0 \n\t" // load shuff
+ "1: \n\t"
+ "xvld $xr4, %0, 0 \n\t"
+ "xvld $xr5, %0, 32 \n\t"
+ "xvld $xr6, %0, 64 \n\t"
+ "xvld $xr7, %0, 96 \n\t" // load 32 pixels of
+ // ARGB
+ "xvor.v $xr12, $xr3, $xr3 \n\t"
+ "xvor.v $xr13, $xr3, $xr3 \n\t"
+ "addi.d %2, %2, -32 \n\t" // 32 processed per
+ // loop.
+ "xvpickev.b $xr8, $xr5, $xr4 \n\t" // BR
+ "xvpickev.b $xr10, $xr7, $xr6 \n\t"
+ "xvpickod.b $xr9, $xr5, $xr4 \n\t" // GA
+ "xvpickod.b $xr11, $xr7, $xr6 \n\t"
+ "xvmaddwev.h.bu $xr12, $xr8, $xr0 \n\t" // B
+ "xvmaddwev.h.bu $xr13, $xr10, $xr0 \n\t"
+ "xvmaddwev.h.bu $xr12, $xr9, $xr1 \n\t" // G
+ "xvmaddwev.h.bu $xr13, $xr11, $xr1 \n\t"
+ "xvmaddwod.h.bu $xr12, $xr8, $xr2 \n\t" // R
+ "xvmaddwod.h.bu $xr13, $xr10, $xr2 \n\t"
+ "addi.d %0, %0, 128 \n\t"
+ "xvpickod.b $xr10, $xr13, $xr12 \n\t"
+ "xvperm.w $xr11, $xr10, $xr20 \n\t"
+ "xvst $xr11, %1, 0 \n\t"
+ "addi.d %1, %1, 32 \n\t"
+ "bnez %2, 1b \n\t"
+ : "+&r"(src_argb), // %0
+ "+&r"(dst_y), // %1
+ "+&r"(width) // %2
+ : "r"(rgbconstants), "r"(shuff)
+ : "memory");
+}
- for (x = 0; x < len; x++) {
- DUP4_ARG2(__lasx_xvld, src_argb, 0, src_argb, 32, src_argb, 64, src_argb,
- 96, src0, src1, src2, src3);
- tmp0 = __lasx_xvpickev_b(src1, src0);
- tmp1 = __lasx_xvpickod_b(src1, src0);
- tmp2 = __lasx_xvpickev_b(src3, src2);
- tmp3 = __lasx_xvpickod_b(src3, src2);
- reg0 = __lasx_xvmaddwev_h_bu(const_128, tmp1, const_150);
- reg1 = __lasx_xvmaddwev_h_bu(const_128, tmp3, const_150);
- reg0 = __lasx_xvdp2add_h_bu(reg0, const_br, tmp0);
- reg1 = __lasx_xvdp2add_h_bu(reg1, const_br, tmp2);
- dst0 = __lasx_xvpickod_b(reg1, reg0);
- dst0 = __lasx_xvperm_w(dst0, shuff);
- __lasx_xvst(dst0, dst_y, 0);
- dst_y += 32;
- src_argb += 128;
- }
+void ARGBToYRow_LASX(const uint8_t* src_argb, uint8_t* dst_y, int width) {
+ ARGBToYMatrixRow_LASX(src_argb, dst_y, width, &kRgb24I601Constants);
+}
+
+void ARGBToYJRow_LASX(const uint8_t* src_argb, uint8_t* dst_yj, int width) {
+ ARGBToYMatrixRow_LASX(src_argb, dst_yj, width, &kRgb24JPEGConstants);
+}
+
+void ABGRToYRow_LASX(const uint8_t* src_abgr, uint8_t* dst_y, int width) {
+ ARGBToYMatrixRow_LASX(src_abgr, dst_y, width, &kRawI601Constants);
+}
+
+void ABGRToYJRow_LASX(const uint8_t* src_abgr, uint8_t* dst_yj, int width) {
+ ARGBToYMatrixRow_LASX(src_abgr, dst_yj, width, &kRawJPEGConstants);
+}
+
+// RGBA expects first value to be A and ignored, then 3 values to contain RGB.
+// Same code as ARGB, except the LD4
+static void RGBAToYMatrixRow_LASX(const uint8_t* src_rgba,
+ uint8_t* dst_y,
+ int width,
+ const struct RgbConstants* rgbconstants) {
+ int32_t shuff[8] = {0, 4, 1, 5, 2, 6, 3, 7};
+ asm volatile(
+ "xvldrepl.b $xr0, %3, 0 \n\t" // load rgbconstants
+ "xvldrepl.b $xr1, %3, 1 \n\t" // load rgbconstants
+ "xvldrepl.b $xr2, %3, 2 \n\t" // load rgbconstants
+ "xvldrepl.h $xr3, %3, 4 \n\t" // load rgbconstants
+ "xvld $xr20, %4, 0 \n\t" // load shuff
+ "1: \n\t"
+ "xvld $xr4, %0, 0 \n\t"
+ "xvld $xr5, %0, 32 \n\t"
+ "xvld $xr6, %0, 64 \n\t"
+ "xvld $xr7, %0, 96 \n\t" // load 32 pixels of
+ // RGBA
+ "xvor.v $xr12, $xr3, $xr3 \n\t"
+ "xvor.v $xr13, $xr3, $xr3 \n\t"
+ "addi.d %2, %2, -32 \n\t" // 32 processed per
+ // loop.
+ "xvpickev.b $xr8, $xr5, $xr4 \n\t" // AG
+ "xvpickev.b $xr10, $xr7, $xr6 \n\t"
+ "xvpickod.b $xr9, $xr5, $xr4 \n\t" // BR
+ "xvpickod.b $xr11, $xr7, $xr6 \n\t"
+ "xvmaddwev.h.bu $xr12, $xr9, $xr0 \n\t" // B
+ "xvmaddwev.h.bu $xr13, $xr11, $xr0 \n\t"
+ "xvmaddwod.h.bu $xr12, $xr8, $xr1 \n\t" // G
+ "xvmaddwod.h.bu $xr13, $xr10, $xr1 \n\t"
+ "xvmaddwod.h.bu $xr12, $xr9, $xr2 \n\t" // R
+ "xvmaddwod.h.bu $xr13, $xr11, $xr2 \n\t"
+ "addi.d %0, %0, 128 \n\t"
+ "xvpickod.b $xr10, $xr13, $xr12 \n\t"
+ "xvperm.w $xr11, $xr10, $xr20 \n\t"
+ "xvst $xr11, %1, 0 \n\t"
+ "addi.d %1, %1, 32 \n\t"
+ "bnez %2, 1b \n\t"
+ : "+&r"(src_rgba), // %0
+ "+&r"(dst_y), // %1
+ "+&r"(width) // %2
+ : "r"(rgbconstants), "r"(shuff)
+ : "memory");
+}
+
+void RGBAToYRow_LASX(const uint8_t* src_rgba, uint8_t* dst_y, int width) {
+ RGBAToYMatrixRow_LASX(src_rgba, dst_y, width, &kRgb24I601Constants);
+}
+
+void RGBAToYJRow_LASX(const uint8_t* src_rgba, uint8_t* dst_yj, int width) {
+ RGBAToYMatrixRow_LASX(src_rgba, dst_yj, width, &kRgb24JPEGConstants);
+}
+
+void BGRAToYRow_LASX(const uint8_t* src_bgra, uint8_t* dst_y, int width) {
+ RGBAToYMatrixRow_LASX(src_bgra, dst_y, width, &kRawI601Constants);
+}
+
+static void RGBToYMatrixRow_LASX(const uint8_t* src_rgba,
+ uint8_t* dst_y,
+ int width,
+ const struct RgbConstants* rgbconstants) {
+ int8_t shuff[128] = {
+ 0, 2, 3, 5, 6, 8, 9, 11, 12, 14, 15, 17, 18, 20, 21, 23,
+ 0, 2, 3, 5, 6, 8, 9, 11, 12, 14, 15, 17, 18, 20, 21, 23,
+ 24, 26, 27, 29, 30, 0, 1, 3, 4, 6, 7, 9, 10, 12, 13, 15,
+ 24, 26, 27, 29, 30, 0, 1, 3, 4, 6, 7, 9, 10, 12, 13, 15,
+ 1, 0, 4, 0, 7, 0, 10, 0, 13, 0, 16, 0, 19, 0, 22, 0,
+ 1, 0, 4, 0, 7, 0, 10, 0, 13, 0, 16, 0, 19, 0, 22, 0,
+ 25, 0, 28, 0, 31, 0, 2, 0, 5, 0, 8, 0, 11, 0, 14, 0,
+ 25, 0, 28, 0, 31, 0, 2, 0, 5, 0, 8, 0, 11, 0, 14, 0};
+ asm volatile(
+ "xvldrepl.b $xr0, %3, 0 \n\t" // load rgbconstants
+ "xvldrepl.b $xr1, %3, 1 \n\t" // load rgbconstants
+ "xvldrepl.b $xr2, %3, 2 \n\t" // load rgbconstants
+ "xvldrepl.h $xr3, %3, 4 \n\t" // load rgbconstants
+ "xvld $xr4, %4, 0 \n\t" // load shuff
+ "xvld $xr5, %4, 32 \n\t"
+ "xvld $xr6, %4, 64 \n\t"
+ "xvld $xr7, %4, 96 \n\t"
+ "1: \n\t"
+ "xvld $xr8, %0, 0 \n\t"
+ "xvld $xr9, %0, 32 \n\t"
+ "xvld $xr10, %0, 64 \n\t" // load 32 pixels of
+ // RGB
+ "xvor.v $xr12, $xr3, $xr3 \n\t"
+ "xvor.v $xr13, $xr3, $xr3 \n\t"
+ "xvor.v $xr11, $xr9, $xr9 \n\t"
+ "addi.d %2, %2, -32 \n\t" // 32 processed per
+ // loop.
+ "xvpermi.q $xr9, $xr8, 0x30 \n\t" // src0
+ "xvpermi.q $xr8, $xr10, 0x03 \n\t" // src1
+ "xvpermi.q $xr10, $xr11, 0x30 \n\t" // src2
+ "xvshuf.b $xr14, $xr8, $xr9, $xr4 \n\t"
+ "xvshuf.b $xr15, $xr8, $xr10, $xr5 \n\t"
+ "xvshuf.b $xr16, $xr8, $xr9, $xr6 \n\t"
+ "xvshuf.b $xr17, $xr8, $xr10, $xr7 \n\t"
+ "xvmaddwev.h.bu $xr12, $xr16, $xr1 \n\t" // G
+ "xvmaddwev.h.bu $xr13, $xr17, $xr1 \n\t"
+ "xvmaddwev.h.bu $xr12, $xr14, $xr0 \n\t" // B
+ "xvmaddwev.h.bu $xr13, $xr15, $xr0 \n\t"
+ "xvmaddwod.h.bu $xr12, $xr14, $xr2 \n\t" // R
+ "xvmaddwod.h.bu $xr13, $xr15, $xr2 \n\t"
+ "addi.d %0, %0, 96 \n\t"
+ "xvpickod.b $xr10, $xr13, $xr12 \n\t"
+ "xvst $xr10, %1, 0 \n\t"
+ "addi.d %1, %1, 32 \n\t"
+ "bnez %2, 1b \n\t"
+ : "+&r"(src_rgba), // %0
+ "+&r"(dst_y), // %1
+ "+&r"(width) // %2
+ : "r"(rgbconstants), // %3
+ "r"(shuff) // %4
+ : "memory");
+}
+
+void RGB24ToYJRow_LASX(const uint8_t* src_rgb24, uint8_t* dst_yj, int width) {
+ RGBToYMatrixRow_LASX(src_rgb24, dst_yj, width, &kRgb24JPEGConstants);
+}
+
+void RAWToYJRow_LASX(const uint8_t* src_raw, uint8_t* dst_yj, int width) {
+ RGBToYMatrixRow_LASX(src_raw, dst_yj, width, &kRawJPEGConstants);
+}
+
+void RGB24ToYRow_LASX(const uint8_t* src_rgb24, uint8_t* dst_y, int width) {
+ RGBToYMatrixRow_LASX(src_rgb24, dst_y, width, &kRgb24I601Constants);
+}
+
+void RAWToYRow_LASX(const uint8_t* src_raw, uint8_t* dst_y, int width) {
+ RGBToYMatrixRow_LASX(src_raw, dst_y, width, &kRawI601Constants);
}
void ARGBToUVJRow_LASX(const uint8_t* src_argb,
diff --git a/files/source/row_lsx.cc b/files/source/row_lsx.cc
index 3e8b901a..e626072a 100644
--- a/files/source/row_lsx.cc
+++ b/files/source/row_lsx.cc
@@ -31,6 +31,91 @@ extern "C" {
yb = __lsx_vreplgr2vr_w(yuvconst->kYBiasToRgb[0]); \
}
+// Load 32 YUV422 pixel data
+#define READYUV422_D(psrc_y, psrc_u, psrc_v, out_y, uv_l, uv_h) \
+ { \
+ __m128i temp0, temp1; \
+ \
+ DUP2_ARG2(__lsx_vld, psrc_y, 0, psrc_u, 0, out_y, temp0); \
+ temp1 = __lsx_vld(psrc_v, 0); \
+ temp0 = __lsx_vsub_b(temp0, const_80); \
+ temp1 = __lsx_vsub_b(temp1, const_80); \
+ temp0 = __lsx_vsllwil_h_b(temp0, 0); \
+ temp1 = __lsx_vsllwil_h_b(temp1, 0); \
+ uv_l = __lsx_vilvl_h(temp0, temp1); \
+ uv_h = __lsx_vilvh_h(temp0, temp1); \
+ }
+
+// Load 16 YUV422 pixel data
+#define READYUV422(psrc_y, psrc_u, psrc_v, out_y, uv) \
+ { \
+ __m128i temp0, temp1; \
+ \
+ out_y = __lsx_vld(psrc_y, 0); \
+ temp0 = __lsx_vldrepl_d(psrc_u, 0); \
+ temp1 = __lsx_vldrepl_d(psrc_v, 0); \
+ uv = __lsx_vilvl_b(temp0, temp1); \
+ uv = __lsx_vsub_b(uv, const_80); \
+ uv = __lsx_vsllwil_h_b(uv, 0); \
+ }
+
+// Convert 16 pixels of YUV420 to RGB.
+#define YUVTORGB_D(in_y, in_uvl, in_uvh, ubvr, ugvg, yg, yb, b_l, b_h, g_l, \
+ g_h, r_l, r_h) \
+ { \
+ __m128i u_l, u_h, v_l, v_h; \
+ __m128i yl_ev, yl_od, yh_ev, yh_od; \
+ __m128i temp0, temp1, temp2, temp3; \
+ \
+ temp0 = __lsx_vilvl_b(in_y, in_y); \
+ temp1 = __lsx_vilvh_b(in_y, in_y); \
+ yl_ev = __lsx_vmulwev_w_hu_h(temp0, yg); \
+ yl_od = __lsx_vmulwod_w_hu_h(temp0, yg); \
+ yh_ev = __lsx_vmulwev_w_hu_h(temp1, yg); \
+ yh_od = __lsx_vmulwod_w_hu_h(temp1, yg); \
+ DUP4_ARG2(__lsx_vsrai_w, yl_ev, 16, yl_od, 16, yh_ev, 16, yh_od, 16, \
+ yl_ev, yl_od, yh_ev, yh_od); \
+ yl_ev = __lsx_vadd_w(yl_ev, yb); \
+ yl_od = __lsx_vadd_w(yl_od, yb); \
+ yh_ev = __lsx_vadd_w(yh_ev, yb); \
+ yh_od = __lsx_vadd_w(yh_od, yb); \
+ v_l = __lsx_vmulwev_w_h(in_uvl, ubvr); \
+ u_l = __lsx_vmulwod_w_h(in_uvl, ubvr); \
+ v_h = __lsx_vmulwev_w_h(in_uvh, ubvr); \
+ u_h = __lsx_vmulwod_w_h(in_uvh, ubvr); \
+ temp0 = __lsx_vadd_w(yl_ev, u_l); \
+ temp1 = __lsx_vadd_w(yl_od, u_l); \
+ temp2 = __lsx_vadd_w(yh_ev, u_h); \
+ temp3 = __lsx_vadd_w(yh_od, u_h); \
+ DUP4_ARG2(__lsx_vsrai_w, temp0, 6, temp1, 6, temp2, 6, temp3, 6, temp0, \
+ temp1, temp2, temp3); \
+ DUP4_ARG1(__lsx_vclip255_w, temp0, temp1, temp2, temp3, temp0, temp1, \
+ temp2, temp3); \
+ b_l = __lsx_vpackev_h(temp1, temp0); \
+ b_h = __lsx_vpackev_h(temp3, temp2); \
+ temp0 = __lsx_vadd_w(yl_ev, v_l); \
+ temp1 = __lsx_vadd_w(yl_od, v_l); \
+ temp2 = __lsx_vadd_w(yh_ev, v_h); \
+ temp3 = __lsx_vadd_w(yh_od, v_h); \
+ DUP4_ARG2(__lsx_vsrai_w, temp0, 6, temp1, 6, temp2, 6, temp3, 6, temp0, \
+ temp1, temp2, temp3); \
+ DUP4_ARG1(__lsx_vclip255_w, temp0, temp1, temp2, temp3, temp0, temp1, \
+ temp2, temp3); \
+ r_l = __lsx_vpackev_h(temp1, temp0); \
+ r_h = __lsx_vpackev_h(temp3, temp2); \
+ DUP2_ARG2(__lsx_vdp2_w_h, in_uvl, ugvg, in_uvh, ugvg, u_l, u_h); \
+ temp0 = __lsx_vsub_w(yl_ev, u_l); \
+ temp1 = __lsx_vsub_w(yl_od, u_l); \
+ temp2 = __lsx_vsub_w(yh_ev, u_h); \
+ temp3 = __lsx_vsub_w(yh_od, u_h); \
+ DUP4_ARG2(__lsx_vsrai_w, temp0, 6, temp1, 6, temp2, 6, temp3, 6, temp0, \
+ temp1, temp2, temp3); \
+ DUP4_ARG1(__lsx_vclip255_w, temp0, temp1, temp2, temp3, temp0, temp1, \
+ temp2, temp3); \
+ g_l = __lsx_vpackev_h(temp1, temp0); \
+ g_h = __lsx_vpackev_h(temp3, temp2); \
+ }
+
// Convert 8 pixels of YUV420 to RGB.
#define YUVTORGB(in_y, in_vu, vrub, vgug, yg, yb, out_b, out_g, out_r) \
{ \
@@ -118,6 +203,25 @@ extern "C" {
out_g = __lsx_vpackev_h(tmp1, tmp0); \
}
+// Pack and Store 16 ARGB values.
+#define STOREARGB_D(a_l, a_h, r_l, r_h, g_l, g_h, b_l, b_h, pdst_argb) \
+ { \
+ __m128i temp0, temp1, temp2, temp3; \
+ temp0 = __lsx_vpackev_b(g_l, b_l); \
+ temp1 = __lsx_vpackev_b(a_l, r_l); \
+ temp2 = __lsx_vpackev_b(g_h, b_h); \
+ temp3 = __lsx_vpackev_b(a_h, r_h); \
+ r_l = __lsx_vilvl_h(temp1, temp0); \
+ r_h = __lsx_vilvh_h(temp1, temp0); \
+ g_l = __lsx_vilvl_h(temp3, temp2); \
+ g_h = __lsx_vilvh_h(temp3, temp2); \
+ __lsx_vst(r_l, pdst_argb, 0); \
+ __lsx_vst(r_h, pdst_argb, 16); \
+ __lsx_vst(g_l, pdst_argb, 32); \
+ __lsx_vst(g_h, pdst_argb, 48); \
+ pdst_argb += 64; \
+ }
+
// Pack and Store 8 ARGB values.
#define STOREARGB(in_a, in_r, in_g, in_b, pdst_argb) \
{ \
@@ -155,6 +259,1028 @@ extern "C" {
_dst0 = __lsx_vpickod_b(_reg1, _reg0); \
}
+void MirrorRow_LSX(const uint8_t* src, uint8_t* dst, int width) {
+ int x;
+ int len = width / 32;
+ __m128i src0, src1;
+ __m128i shuffler = {0x08090A0B0C0D0E0F, 0x0001020304050607};
+ src += width - 32;
+ for (x = 0; x < len; x++) {
+ DUP2_ARG2(__lsx_vld, src, 0, src, 16, src0, src1);
+ DUP2_ARG3(__lsx_vshuf_b, src0, src0, shuffler, src1, src1, shuffler, src0,
+ src1);
+ __lsx_vst(src1, dst, 0);
+ __lsx_vst(src0, dst, 16);
+ dst += 32;
+ src -= 32;
+ }
+}
+
+void MirrorUVRow_LSX(const uint8_t* src_uv, uint8_t* dst_uv, int width) {
+ int x;
+ int len = width / 8;
+ __m128i src, dst;
+ __m128i shuffler = {0x0004000500060007, 0x0000000100020003};
+
+ src_uv += (width - 8) << 1;
+ for (x = 0; x < len; x++) {
+ src = __lsx_vld(src_uv, 0);
+ dst = __lsx_vshuf_h(shuffler, src, src);
+ __lsx_vst(dst, dst_uv, 0);
+ src_uv -= 16;
+ dst_uv += 16;
+ }
+}
+
+void ARGBMirrorRow_LSX(const uint8_t* src, uint8_t* dst, int width) {
+ int x;
+ int len = width / 8;
+ __m128i src0, src1;
+ __m128i shuffler = {0x0B0A09080F0E0D0C, 0x0302010007060504};
+
+ src += (width * 4) - 32;
+ for (x = 0; x < len; x++) {
+ DUP2_ARG2(__lsx_vld, src, 0, src, 16, src0, src1);
+ DUP2_ARG3(__lsx_vshuf_b, src0, src0, shuffler, src1, src1, shuffler, src0,
+ src1);
+ __lsx_vst(src1, dst, 0);
+ __lsx_vst(src0, dst, 16);
+ dst += 32;
+ src -= 32;
+ }
+}
+
+void I422ToYUY2Row_LSX(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_yuy2,
+ int width) {
+ int x;
+ int len = width / 16;
+ __m128i src_u0, src_v0, src_y0, vec_uv0;
+ __m128i vec_yuy2_0, vec_yuy2_1;
+
+ for (x = 0; x < len; x++) {
+ DUP2_ARG2(__lsx_vld, src_u, 0, src_v, 0, src_u0, src_v0);
+ src_y0 = __lsx_vld(src_y, 0);
+ vec_uv0 = __lsx_vilvl_b(src_v0, src_u0);
+ vec_yuy2_0 = __lsx_vilvl_b(vec_uv0, src_y0);
+ vec_yuy2_1 = __lsx_vilvh_b(vec_uv0, src_y0);
+ __lsx_vst(vec_yuy2_0, dst_yuy2, 0);
+ __lsx_vst(vec_yuy2_1, dst_yuy2, 16);
+ src_u += 8;
+ src_v += 8;
+ src_y += 16;
+ dst_yuy2 += 32;
+ }
+}
+
+void I422ToUYVYRow_LSX(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_uyvy,
+ int width) {
+ int x;
+ int len = width / 16;
+ __m128i src_u0, src_v0, src_y0, vec_uv0;
+ __m128i vec_uyvy0, vec_uyvy1;
+
+ for (x = 0; x < len; x++) {
+ DUP2_ARG2(__lsx_vld, src_u, 0, src_v, 0, src_u0, src_v0);
+ src_y0 = __lsx_vld(src_y, 0);
+ vec_uv0 = __lsx_vilvl_b(src_v0, src_u0);
+ vec_uyvy0 = __lsx_vilvl_b(src_y0, vec_uv0);
+ vec_uyvy1 = __lsx_vilvh_b(src_y0, vec_uv0);
+ __lsx_vst(vec_uyvy0, dst_uyvy, 0);
+ __lsx_vst(vec_uyvy1, dst_uyvy, 16);
+ src_u += 8;
+ src_v += 8;
+ src_y += 16;
+ dst_uyvy += 32;
+ }
+}
+
+void I422ToARGBRow_LSX(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ int x;
+ int len = width / 16;
+ __m128i vec_yb, vec_yg, vec_ub, vec_ug, vec_vr, vec_vg;
+ __m128i vec_ubvr, vec_ugvg;
+ __m128i alpha = __lsx_vldi(0xFF);
+ __m128i const_80 = __lsx_vldi(0x80);
+
+ YUVTORGB_SETUP(yuvconstants, vec_vr, vec_ub, vec_vg, vec_ug, vec_yg, vec_yb);
+ vec_ubvr = __lsx_vilvl_h(vec_ub, vec_vr);
+ vec_ugvg = __lsx_vilvl_h(vec_ug, vec_vg);
+
+ for (x = 0; x < len; x++) {
+ __m128i y, uv_l, uv_h, b_l, b_h, g_l, g_h, r_l, r_h;
+
+ READYUV422_D(src_y, src_u, src_v, y, uv_l, uv_h);
+ YUVTORGB_D(y, uv_l, uv_h, vec_ubvr, vec_ugvg, vec_yg, vec_yb, b_l, b_h, g_l,
+ g_h, r_l, r_h);
+ STOREARGB_D(alpha, alpha, r_l, r_h, g_l, g_h, b_l, b_h, dst_argb);
+ src_y += 16;
+ src_u += 8;
+ src_v += 8;
+ }
+}
+
+void I422ToRGBARow_LSX(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ int x;
+ int len = width / 16;
+ __m128i vec_yb, vec_yg, vec_ub, vec_vr, vec_ug, vec_vg;
+ __m128i vec_ubvr, vec_ugvg;
+ __m128i alpha = __lsx_vldi(0xFF);
+ __m128i const_80 = __lsx_vldi(0x80);
+
+ YUVTORGB_SETUP(yuvconstants, vec_vr, vec_ub, vec_vg, vec_ug, vec_yg, vec_yb);
+ vec_ubvr = __lsx_vilvl_h(vec_ub, vec_vr);
+ vec_ugvg = __lsx_vilvl_h(vec_ug, vec_vg);
+
+ for (x = 0; x < len; x++) {
+ __m128i y, uv_l, uv_h, b_l, b_h, g_l, g_h, r_l, r_h;
+
+ READYUV422_D(src_y, src_u, src_v, y, uv_l, uv_h);
+ YUVTORGB_D(y, uv_l, uv_h, vec_ubvr, vec_ugvg, vec_yg, vec_yb, b_l, b_h, g_l,
+ g_h, r_l, r_h);
+ STOREARGB_D(r_l, r_h, g_l, g_h, b_l, b_h, alpha, alpha, dst_argb);
+ src_y += 16;
+ src_u += 8;
+ src_v += 8;
+ }
+}
+
+void I422AlphaToARGBRow_LSX(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ const uint8_t* src_a,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ int x;
+ int len = width / 16;
+ int res = width & 15;
+ __m128i vec_yb, vec_yg, vec_ub, vec_vr, vec_ug, vec_vg;
+ __m128i vec_ubvr, vec_ugvg;
+ __m128i zero = __lsx_vldi(0);
+ __m128i const_80 = __lsx_vldi(0x80);
+
+ YUVTORGB_SETUP(yuvconstants, vec_vr, vec_ub, vec_vg, vec_ug, vec_yg, vec_yb);
+ vec_ubvr = __lsx_vilvl_h(vec_ub, vec_vr);
+ vec_ugvg = __lsx_vilvl_h(vec_ug, vec_vg);
+
+ for (x = 0; x < len; x++) {
+ __m128i y, uv_l, uv_h, b_l, b_h, g_l, g_h, r_l, r_h, a_l, a_h;
+
+ y = __lsx_vld(src_a, 0);
+ a_l = __lsx_vilvl_b(zero, y);
+ a_h = __lsx_vilvh_b(zero, y);
+ READYUV422_D(src_y, src_u, src_v, y, uv_l, uv_h);
+ YUVTORGB_D(y, uv_l, uv_h, vec_ubvr, vec_ugvg, vec_yg, vec_yb, b_l, b_h, g_l,
+ g_h, r_l, r_h);
+ STOREARGB_D(a_l, a_h, r_l, r_h, g_l, g_h, b_l, b_h, dst_argb);
+ src_y += 16;
+ src_u += 8;
+ src_v += 8;
+ src_a += 16;
+ }
+ if (res) {
+ __m128i y, uv, r, g, b, a;
+ a = __lsx_vld(src_a, 0);
+ a = __lsx_vsllwil_hu_bu(a, 0);
+ READYUV422(src_y, src_u, src_v, y, uv);
+ YUVTORGB(y, uv, vec_ubvr, vec_ugvg, vec_yg, vec_yb, b, g, r);
+ STOREARGB(a, r, g, b, dst_argb);
+ }
+}
+
+void I422ToRGB24Row_LSX(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int32_t width) {
+ int x;
+ int len = width / 16;
+ __m128i vec_yb, vec_yg, vec_ub, vec_vr, vec_ug, vec_vg;
+ __m128i vec_ubvr, vec_ugvg;
+ __m128i const_80 = __lsx_vldi(0x80);
+ __m128i shuffler0 = {0x0504120302100100, 0x0A18090816070614};
+ __m128i shuffler1 = {0x1E0F0E1C0D0C1A0B, 0x1E0F0E1C0D0C1A0B};
+
+ YUVTORGB_SETUP(yuvconstants, vec_vr, vec_ub, vec_vg, vec_ug, vec_yg, vec_yb);
+ vec_ubvr = __lsx_vilvl_h(vec_ub, vec_vr);
+ vec_ugvg = __lsx_vilvl_h(vec_ug, vec_vg);
+
+ for (x = 0; x < len; x++) {
+ __m128i y, uv_l, uv_h, b_l, b_h, g_l, g_h, r_l, r_h;
+ __m128i temp0, temp1, temp2, temp3;
+
+ READYUV422_D(src_y, src_u, src_v, y, uv_l, uv_h);
+ YUVTORGB_D(y, uv_l, uv_h, vec_ubvr, vec_ugvg, vec_yg, vec_yb, b_l, b_h, g_l,
+ g_h, r_l, r_h);
+ temp0 = __lsx_vpackev_b(g_l, b_l);
+ temp1 = __lsx_vpackev_b(g_h, b_h);
+ DUP4_ARG3(__lsx_vshuf_b, r_l, temp0, shuffler1, r_h, temp1, shuffler1, r_l,
+ temp0, shuffler0, r_h, temp1, shuffler0, temp2, temp3, temp0,
+ temp1);
+
+ b_l = __lsx_vilvl_d(temp1, temp2);
+ b_h = __lsx_vilvh_d(temp3, temp1);
+ __lsx_vst(temp0, dst_argb, 0);
+ __lsx_vst(b_l, dst_argb, 16);
+ __lsx_vst(b_h, dst_argb, 32);
+ dst_argb += 48;
+ src_y += 16;
+ src_u += 8;
+ src_v += 8;
+ }
+}
+
+// TODO(fbarchard): Consider AND instead of shift to isolate 5 upper bits of R.
+void I422ToRGB565Row_LSX(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_rgb565,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ int x;
+ int len = width / 16;
+ __m128i vec_yb, vec_yg, vec_ub, vec_vr, vec_ug, vec_vg;
+ __m128i vec_ubvr, vec_ugvg;
+ __m128i const_80 = __lsx_vldi(0x80);
+
+ YUVTORGB_SETUP(yuvconstants, vec_vr, vec_ub, vec_vg, vec_ug, vec_yg, vec_yb);
+ vec_ubvr = __lsx_vilvl_h(vec_ub, vec_vr);
+ vec_ugvg = __lsx_vilvl_h(vec_ug, vec_vg);
+
+ for (x = 0; x < len; x++) {
+ __m128i y, uv_l, uv_h, b_l, b_h, g_l, g_h, r_l, r_h;
+
+ READYUV422_D(src_y, src_u, src_v, y, uv_l, uv_h);
+ YUVTORGB_D(y, uv_l, uv_h, vec_ubvr, vec_ugvg, vec_yg, vec_yb, b_l, b_h, g_l,
+ g_h, r_l, r_h);
+ b_l = __lsx_vsrli_h(b_l, 3);
+ b_h = __lsx_vsrli_h(b_h, 3);
+ g_l = __lsx_vsrli_h(g_l, 2);
+ g_h = __lsx_vsrli_h(g_h, 2);
+ r_l = __lsx_vsrli_h(r_l, 3);
+ r_h = __lsx_vsrli_h(r_h, 3);
+ r_l = __lsx_vslli_h(r_l, 11);
+ r_h = __lsx_vslli_h(r_h, 11);
+ g_l = __lsx_vslli_h(g_l, 5);
+ g_h = __lsx_vslli_h(g_h, 5);
+ r_l = __lsx_vor_v(r_l, g_l);
+ r_l = __lsx_vor_v(r_l, b_l);
+ r_h = __lsx_vor_v(r_h, g_h);
+ r_h = __lsx_vor_v(r_h, b_h);
+ __lsx_vst(r_l, dst_rgb565, 0);
+ __lsx_vst(r_h, dst_rgb565, 16);
+ dst_rgb565 += 32;
+ src_y += 16;
+ src_u += 8;
+ src_v += 8;
+ }
+}
+
+// TODO(fbarchard): Consider AND instead of shift to isolate 4 upper bits of G.
+void I422ToARGB4444Row_LSX(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_argb4444,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ int x;
+ int len = width / 16;
+ __m128i vec_yb, vec_yg, vec_ub, vec_vr, vec_ug, vec_vg;
+ __m128i vec_ubvr, vec_ugvg;
+ __m128i const_80 = __lsx_vldi(0x80);
+ __m128i alpha = {0xF000F000F000F000, 0xF000F000F000F000};
+ __m128i mask = {0x00F000F000F000F0, 0x00F000F000F000F0};
+
+ YUVTORGB_SETUP(yuvconstants, vec_vr, vec_ub, vec_vg, vec_ug, vec_yg, vec_yb);
+ vec_ubvr = __lsx_vilvl_h(vec_ub, vec_vr);
+ vec_ugvg = __lsx_vilvl_h(vec_ug, vec_vg);
+
+ for (x = 0; x < len; x++) {
+ __m128i y, uv_l, uv_h, b_l, b_h, g_l, g_h, r_l, r_h;
+
+ READYUV422_D(src_y, src_u, src_v, y, uv_l, uv_h);
+ YUVTORGB_D(y, uv_l, uv_h, vec_ubvr, vec_ugvg, vec_yg, vec_yb, b_l, b_h, g_l,
+ g_h, r_l, r_h);
+ b_l = __lsx_vsrli_h(b_l, 4);
+ b_h = __lsx_vsrli_h(b_h, 4);
+ r_l = __lsx_vsrli_h(r_l, 4);
+ r_h = __lsx_vsrli_h(r_h, 4);
+ g_l = __lsx_vand_v(g_l, mask);
+ g_h = __lsx_vand_v(g_h, mask);
+ r_l = __lsx_vslli_h(r_l, 8);
+ r_h = __lsx_vslli_h(r_h, 8);
+ r_l = __lsx_vor_v(r_l, alpha);
+ r_h = __lsx_vor_v(r_h, alpha);
+ r_l = __lsx_vor_v(r_l, g_l);
+ r_h = __lsx_vor_v(r_h, g_h);
+ r_l = __lsx_vor_v(r_l, b_l);
+ r_h = __lsx_vor_v(r_h, b_h);
+ __lsx_vst(r_l, dst_argb4444, 0);
+ __lsx_vst(r_h, dst_argb4444, 16);
+ dst_argb4444 += 32;
+ src_y += 16;
+ src_u += 8;
+ src_v += 8;
+ }
+}
+
+void I422ToARGB1555Row_LSX(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_argb1555,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ int x;
+ int len = width / 16;
+ __m128i vec_yb, vec_yg, vec_ub, vec_vr, vec_ug, vec_vg;
+ __m128i vec_ubvr, vec_ugvg;
+ __m128i const_80 = __lsx_vldi(0x80);
+ __m128i alpha = {0x8000800080008000, 0x8000800080008000};
+
+ YUVTORGB_SETUP(yuvconstants, vec_vr, vec_ub, vec_vg, vec_ug, vec_yg, vec_yb);
+ vec_ubvr = __lsx_vilvl_h(vec_ub, vec_vr);
+ vec_ugvg = __lsx_vilvl_h(vec_ug, vec_vg);
+
+ for (x = 0; x < len; x++) {
+ __m128i y, uv_l, uv_h, b_l, b_h, g_l, g_h, r_l, r_h;
+
+ READYUV422_D(src_y, src_u, src_v, y, uv_l, uv_h);
+ YUVTORGB_D(y, uv_l, uv_h, vec_ubvr, vec_ugvg, vec_yg, vec_yb, b_l, b_h, g_l,
+ g_h, r_l, r_h);
+ b_l = __lsx_vsrli_h(b_l, 3);
+ b_h = __lsx_vsrli_h(b_h, 3);
+ g_l = __lsx_vsrli_h(g_l, 3);
+
+ g_h = __lsx_vsrli_h(g_h, 3);
+ g_l = __lsx_vslli_h(g_l, 5);
+ g_h = __lsx_vslli_h(g_h, 5);
+ r_l = __lsx_vsrli_h(r_l, 3);
+ r_h = __lsx_vsrli_h(r_h, 3);
+ r_l = __lsx_vslli_h(r_l, 10);
+ r_h = __lsx_vslli_h(r_h, 10);
+ r_l = __lsx_vor_v(r_l, alpha);
+ r_h = __lsx_vor_v(r_h, alpha);
+ r_l = __lsx_vor_v(r_l, g_l);
+ r_h = __lsx_vor_v(r_h, g_h);
+ r_l = __lsx_vor_v(r_l, b_l);
+ r_h = __lsx_vor_v(r_h, b_h);
+ __lsx_vst(r_l, dst_argb1555, 0);
+ __lsx_vst(r_h, dst_argb1555, 16);
+ dst_argb1555 += 32;
+ src_y += 16;
+ src_u += 8;
+ src_v += 8;
+ }
+}
+
+void YUY2ToYRow_LSX(const uint8_t* src_yuy2, uint8_t* dst_y, int width) {
+ int x;
+ int len = width / 16;
+ __m128i src0, src1, dst0;
+
+ for (x = 0; x < len; x++) {
+ DUP2_ARG2(__lsx_vld, src_yuy2, 0, src_yuy2, 16, src0, src1);
+ dst0 = __lsx_vpickev_b(src1, src0);
+ __lsx_vst(dst0, dst_y, 0);
+ src_yuy2 += 32;
+ dst_y += 16;
+ }
+}
+
+void YUY2ToUVRow_LSX(const uint8_t* src_yuy2,
+ int src_stride_yuy2,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width) {
+ const uint8_t* src_yuy2_next = src_yuy2 + src_stride_yuy2;
+ int x;
+ int len = width / 16;
+ __m128i src0, src1, src2, src3;
+ __m128i tmp0, dst0, dst1;
+
+ for (x = 0; x < len; x++) {
+ DUP4_ARG2(__lsx_vld, src_yuy2, 0, src_yuy2, 16, src_yuy2_next, 0,
+ src_yuy2_next, 16, src0, src1, src2, src3);
+ src0 = __lsx_vpickod_b(src1, src0);
+ src1 = __lsx_vpickod_b(src3, src2);
+ tmp0 = __lsx_vavgr_bu(src1, src0);
+ dst0 = __lsx_vpickev_b(tmp0, tmp0);
+ dst1 = __lsx_vpickod_b(tmp0, tmp0);
+ __lsx_vstelm_d(dst0, dst_u, 0, 0);
+ __lsx_vstelm_d(dst1, dst_v, 0, 0);
+ src_yuy2 += 32;
+ src_yuy2_next += 32;
+ dst_u += 8;
+ dst_v += 8;
+ }
+}
+
+void YUY2ToUV422Row_LSX(const uint8_t* src_yuy2,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width) {
+ int x;
+ int len = width / 16;
+ __m128i src0, src1, tmp0, dst0, dst1;
+
+ for (x = 0; x < len; x++) {
+ DUP2_ARG2(__lsx_vld, src_yuy2, 0, src_yuy2, 16, src0, src1);
+ tmp0 = __lsx_vpickod_b(src1, src0);
+ dst0 = __lsx_vpickev_b(tmp0, tmp0);
+ dst1 = __lsx_vpickod_b(tmp0, tmp0);
+ __lsx_vstelm_d(dst0, dst_u, 0, 0);
+ __lsx_vstelm_d(dst1, dst_v, 0, 0);
+ src_yuy2 += 32;
+ dst_u += 8;
+ dst_v += 8;
+ }
+}
+
+void UYVYToYRow_LSX(const uint8_t* src_uyvy, uint8_t* dst_y, int width) {
+ int x;
+ int len = width / 16;
+ __m128i src0, src1, dst0;
+
+ for (x = 0; x < len; x++) {
+ DUP2_ARG2(__lsx_vld, src_uyvy, 0, src_uyvy, 16, src0, src1);
+ dst0 = __lsx_vpickod_b(src1, src0);
+ __lsx_vst(dst0, dst_y, 0);
+ src_uyvy += 32;
+ dst_y += 16;
+ }
+}
+
+void UYVYToUVRow_LSX(const uint8_t* src_uyvy,
+ int src_stride_uyvy,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width) {
+ const uint8_t* src_uyvy_next = src_uyvy + src_stride_uyvy;
+ int x;
+ int len = width / 16;
+ __m128i src0, src1, src2, src3, tmp0, dst0, dst1;
+
+ for (x = 0; x < len; x++) {
+ DUP4_ARG2(__lsx_vld, src_uyvy, 0, src_uyvy, 16, src_uyvy_next, 0,
+ src_uyvy_next, 16, src0, src1, src2, src3);
+ src0 = __lsx_vpickev_b(src1, src0);
+ src1 = __lsx_vpickev_b(src3, src2);
+ tmp0 = __lsx_vavgr_bu(src1, src0);
+ dst0 = __lsx_vpickev_b(tmp0, tmp0);
+ dst1 = __lsx_vpickod_b(tmp0, tmp0);
+ __lsx_vstelm_d(dst0, dst_u, 0, 0);
+ __lsx_vstelm_d(dst1, dst_v, 0, 0);
+ src_uyvy += 32;
+ src_uyvy_next += 32;
+ dst_u += 8;
+ dst_v += 8;
+ }
+}
+
+void UYVYToUV422Row_LSX(const uint8_t* src_uyvy,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width) {
+ int x;
+ int len = width / 16;
+ __m128i src0, src1, tmp0, dst0, dst1;
+
+ for (x = 0; x < len; x++) {
+ DUP2_ARG2(__lsx_vld, src_uyvy, 0, src_uyvy, 16, src0, src1);
+ tmp0 = __lsx_vpickev_b(src1, src0);
+ dst0 = __lsx_vpickev_b(tmp0, tmp0);
+ dst1 = __lsx_vpickod_b(tmp0, tmp0);
+ __lsx_vstelm_d(dst0, dst_u, 0, 0);
+ __lsx_vstelm_d(dst1, dst_v, 0, 0);
+ src_uyvy += 32;
+ dst_u += 8;
+ dst_v += 8;
+ }
+}
+
+void ARGBToUVRow_LSX(const uint8_t* src_argb0,
+ int src_stride_argb,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width) {
+ int x;
+ int len = width / 16;
+ const uint8_t* src_argb1 = src_argb0 + src_stride_argb;
+
+ __m128i src0, src1, src2, src3, src4, src5, src6, src7;
+ __m128i vec0, vec1, vec2, vec3;
+ __m128i tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, dst0, dst1;
+ __m128i const_0x70 = {0x0038003800380038, 0x0038003800380038};
+ __m128i const_0x4A = {0x0025002500250025, 0x0025002500250025};
+ __m128i const_0x26 = {0x0013001300130013, 0x0013001300130013};
+ __m128i const_0x5E = {0x002f002f002f002f, 0x002f002f002f002f};
+ __m128i const_0x12 = {0x0009000900090009, 0x0009000900090009};
+ __m128i const_0x8080 = {0x8080808080808080, 0x8080808080808080};
+ for (x = 0; x < len; x++) {
+ DUP4_ARG2(__lsx_vld, src_argb0, 0, src_argb0, 16, src_argb0, 32, src_argb0,
+ 48, src0, src1, src2, src3);
+ DUP4_ARG2(__lsx_vld, src_argb1, 0, src_argb1, 16, src_argb1, 32, src_argb1,
+ 48, src4, src5, src6, src7);
+ vec0 = __lsx_vaddwev_h_bu(src0, src4);
+ vec1 = __lsx_vaddwev_h_bu(src1, src5);
+ vec2 = __lsx_vaddwev_h_bu(src2, src6);
+ vec3 = __lsx_vaddwev_h_bu(src3, src7);
+ tmp0 = __lsx_vpickev_h(vec1, vec0);
+ tmp1 = __lsx_vpickev_h(vec3, vec2);
+ tmp2 = __lsx_vpickod_h(vec1, vec0);
+ tmp3 = __lsx_vpickod_h(vec3, vec2);
+ vec0 = __lsx_vaddwod_h_bu(src0, src4);
+ vec1 = __lsx_vaddwod_h_bu(src1, src5);
+ vec2 = __lsx_vaddwod_h_bu(src2, src6);
+ vec3 = __lsx_vaddwod_h_bu(src3, src7);
+ tmp4 = __lsx_vpickev_h(vec1, vec0);
+ tmp5 = __lsx_vpickev_h(vec3, vec2);
+ vec0 = __lsx_vpickev_h(tmp1, tmp0);
+ vec1 = __lsx_vpickod_h(tmp1, tmp0);
+ src0 = __lsx_vavgr_h(vec0, vec1);
+ vec0 = __lsx_vpickev_h(tmp3, tmp2);
+ vec1 = __lsx_vpickod_h(tmp3, tmp2);
+ src1 = __lsx_vavgr_h(vec0, vec1);
+ vec0 = __lsx_vpickev_h(tmp5, tmp4);
+ vec1 = __lsx_vpickod_h(tmp5, tmp4);
+ src2 = __lsx_vavgr_h(vec0, vec1);
+ dst0 = __lsx_vmadd_h(const_0x8080, src0, const_0x70);
+ dst0 = __lsx_vmsub_h(dst0, src2, const_0x4A);
+ dst0 = __lsx_vmsub_h(dst0, src1, const_0x26);
+ dst1 = __lsx_vmadd_h(const_0x8080, src1, const_0x70);
+ dst1 = __lsx_vmsub_h(dst1, src2, const_0x5E);
+ dst1 = __lsx_vmsub_h(dst1, src0, const_0x12);
+ dst0 = __lsx_vsrai_h(dst0, 8);
+ dst1 = __lsx_vsrai_h(dst1, 8);
+ dst0 = __lsx_vpickev_b(dst1, dst0);
+ __lsx_vstelm_d(dst0, dst_u, 0, 0);
+ __lsx_vstelm_d(dst0, dst_v, 0, 1);
+ src_argb0 += 64;
+ src_argb1 += 64;
+ dst_u += 8;
+ dst_v += 8;
+ }
+}
+
+void ARGBToRGB24Row_LSX(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
+ int x;
+ int len = (width / 16) - 1;
+ __m128i src0, src1, src2, src3;
+ __m128i tmp0, tmp1, tmp2, tmp3;
+ __m128i shuf = {0x0908060504020100, 0x000000000E0D0C0A};
+ for (x = 0; x < len; x++) {
+ DUP4_ARG2(__lsx_vld, src_argb, 0, src_argb, 16, src_argb, 32, src_argb, 48,
+ src0, src1, src2, src3);
+ tmp0 = __lsx_vshuf_b(src0, src0, shuf);
+ tmp1 = __lsx_vshuf_b(src1, src1, shuf);
+ tmp2 = __lsx_vshuf_b(src2, src2, shuf);
+ tmp3 = __lsx_vshuf_b(src3, src3, shuf);
+ __lsx_vst(tmp0, dst_rgb, 0);
+ __lsx_vst(tmp1, dst_rgb, 12);
+ __lsx_vst(tmp2, dst_rgb, 24);
+ __lsx_vst(tmp3, dst_rgb, 36);
+ dst_rgb += 48;
+ src_argb += 64;
+ }
+ DUP4_ARG2(__lsx_vld, src_argb, 0, src_argb, 16, src_argb, 32, src_argb, 48,
+ src0, src1, src2, src3);
+ tmp0 = __lsx_vshuf_b(src0, src0, shuf);
+ tmp1 = __lsx_vshuf_b(src1, src1, shuf);
+ tmp2 = __lsx_vshuf_b(src2, src2, shuf);
+ tmp3 = __lsx_vshuf_b(src3, src3, shuf);
+ __lsx_vst(tmp0, dst_rgb, 0);
+ __lsx_vst(tmp1, dst_rgb, 12);
+ __lsx_vst(tmp2, dst_rgb, 24);
+ dst_rgb += 36;
+ __lsx_vst(tmp3, dst_rgb, 0);
+}
+
+void ARGBToRAWRow_LSX(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
+ int x;
+ int len = (width / 16) - 1;
+ __m128i src0, src1, src2, src3;
+ __m128i tmp0, tmp1, tmp2, tmp3;
+ __m128i shuf = {0x090A040506000102, 0x000000000C0D0E08};
+ for (x = 0; x < len; x++) {
+ DUP4_ARG2(__lsx_vld, src_argb, 0, src_argb, 16, src_argb, 32, src_argb, 48,
+ src0, src1, src2, src3);
+ tmp0 = __lsx_vshuf_b(src0, src0, shuf);
+ tmp1 = __lsx_vshuf_b(src1, src1, shuf);
+ tmp2 = __lsx_vshuf_b(src2, src2, shuf);
+ tmp3 = __lsx_vshuf_b(src3, src3, shuf);
+ __lsx_vst(tmp0, dst_rgb, 0);
+ __lsx_vst(tmp1, dst_rgb, 12);
+ __lsx_vst(tmp2, dst_rgb, 24);
+ __lsx_vst(tmp3, dst_rgb, 36);
+ dst_rgb += 48;
+ src_argb += 64;
+ }
+ DUP4_ARG2(__lsx_vld, src_argb, 0, src_argb, 16, src_argb, 32, src_argb, 48,
+ src0, src1, src2, src3);
+ tmp0 = __lsx_vshuf_b(src0, src0, shuf);
+ tmp1 = __lsx_vshuf_b(src1, src1, shuf);
+ tmp2 = __lsx_vshuf_b(src2, src2, shuf);
+ tmp3 = __lsx_vshuf_b(src3, src3, shuf);
+ __lsx_vst(tmp0, dst_rgb, 0);
+ __lsx_vst(tmp1, dst_rgb, 12);
+ __lsx_vst(tmp2, dst_rgb, 24);
+ dst_rgb += 36;
+ __lsx_vst(tmp3, dst_rgb, 0);
+}
+
+void ARGBToRGB565Row_LSX(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
+ int x;
+ int len = width / 8;
+ __m128i zero = __lsx_vldi(0);
+ __m128i src0, src1, tmp0, tmp1, dst0;
+ __m128i shift = {0x0300030003000300, 0x0300030003000300};
+
+ for (x = 0; x < len; x++) {
+ DUP2_ARG2(__lsx_vld, src_argb, 0, src_argb, 16, src0, src1);
+ tmp0 = __lsx_vpickev_b(src1, src0);
+ tmp1 = __lsx_vpickod_b(src1, src0);
+ tmp0 = __lsx_vsrli_b(tmp0, 3);
+ tmp1 = __lsx_vpackev_b(zero, tmp1);
+ tmp1 = __lsx_vsrli_h(tmp1, 2);
+ tmp0 = __lsx_vsll_b(tmp0, shift);
+ tmp1 = __lsx_vslli_h(tmp1, 5);
+ dst0 = __lsx_vor_v(tmp0, tmp1);
+ __lsx_vst(dst0, dst_rgb, 0);
+ dst_rgb += 16;
+ src_argb += 32;
+ }
+}
+
+void ARGBToARGB1555Row_LSX(const uint8_t* src_argb,
+ uint8_t* dst_rgb,
+ int width) {
+ int x;
+ int len = width / 8;
+ __m128i zero = __lsx_vldi(0);
+ __m128i src0, src1, tmp0, tmp1, tmp2, tmp3, dst0;
+ __m128i shift1 = {0x0703070307030703, 0x0703070307030703};
+ __m128i shift2 = {0x0200020002000200, 0x0200020002000200};
+
+ for (x = 0; x < len; x++) {
+ DUP2_ARG2(__lsx_vld, src_argb, 0, src_argb, 16, src0, src1);
+ tmp0 = __lsx_vpickev_b(src1, src0);
+ tmp1 = __lsx_vpickod_b(src1, src0);
+ tmp0 = __lsx_vsrli_b(tmp0, 3);
+ tmp1 = __lsx_vsrl_b(tmp1, shift1);
+ tmp0 = __lsx_vsll_b(tmp0, shift2);
+ tmp2 = __lsx_vpackev_b(zero, tmp1);
+ tmp3 = __lsx_vpackod_b(zero, tmp1);
+ tmp2 = __lsx_vslli_h(tmp2, 5);
+ tmp3 = __lsx_vslli_h(tmp3, 15);
+ dst0 = __lsx_vor_v(tmp0, tmp2);
+ dst0 = __lsx_vor_v(dst0, tmp3);
+ __lsx_vst(dst0, dst_rgb, 0);
+ dst_rgb += 16;
+ src_argb += 32;
+ }
+}
+
+void ARGBToARGB4444Row_LSX(const uint8_t* src_argb,
+ uint8_t* dst_rgb,
+ int width) {
+ int x;
+ int len = width / 8;
+ __m128i src0, src1, tmp0, tmp1, dst0;
+
+ for (x = 0; x < len; x++) {
+ DUP2_ARG2(__lsx_vld, src_argb, 0, src_argb, 16, src0, src1);
+ tmp0 = __lsx_vpickev_b(src1, src0);
+ tmp1 = __lsx_vpickod_b(src1, src0);
+ tmp1 = __lsx_vandi_b(tmp1, 0xF0);
+ tmp0 = __lsx_vsrli_b(tmp0, 4);
+ dst0 = __lsx_vor_v(tmp1, tmp0);
+ __lsx_vst(dst0, dst_rgb, 0);
+ dst_rgb += 16;
+ src_argb += 32;
+ }
+}
+
+void ARGBToUV444Row_LSX(const uint8_t* src_argb,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int32_t width) {
+ int x;
+ int len = width / 16;
+ __m128i src0, src1, src2, src3;
+ __m128i tmp0, tmp1, tmp2, tmp3;
+ __m128i reg0, reg1, reg2, reg3, dst0, dst1;
+ __m128i const_112 = __lsx_vldi(112);
+ __m128i const_74 = __lsx_vldi(74);
+ __m128i const_38 = __lsx_vldi(38);
+ __m128i const_94 = __lsx_vldi(94);
+ __m128i const_18 = __lsx_vldi(18);
+ __m128i const_0x8080 = {0x8080808080808080, 0x8080808080808080};
+ for (x = 0; x < len; x++) {
+ DUP4_ARG2(__lsx_vld, src_argb, 0, src_argb, 16, src_argb, 32, src_argb, 48,
+ src0, src1, src2, src3);
+ tmp0 = __lsx_vpickev_h(src1, src0);
+ tmp1 = __lsx_vpickod_h(src1, src0);
+ tmp2 = __lsx_vpickev_h(src3, src2);
+ tmp3 = __lsx_vpickod_h(src3, src2);
+ reg0 = __lsx_vmaddwev_h_bu(const_0x8080, tmp0, const_112);
+ reg1 = __lsx_vmaddwev_h_bu(const_0x8080, tmp2, const_112);
+ reg2 = __lsx_vmulwod_h_bu(tmp0, const_74);
+ reg3 = __lsx_vmulwod_h_bu(tmp2, const_74);
+ reg2 = __lsx_vmaddwev_h_bu(reg2, tmp1, const_38);
+ reg3 = __lsx_vmaddwev_h_bu(reg3, tmp3, const_38);
+ reg0 = __lsx_vsub_h(reg0, reg2);
+ reg1 = __lsx_vsub_h(reg1, reg3);
+ reg0 = __lsx_vsrai_h(reg0, 8);
+ reg1 = __lsx_vsrai_h(reg1, 8);
+ dst0 = __lsx_vpickev_b(reg1, reg0);
+
+ reg0 = __lsx_vmaddwev_h_bu(const_0x8080, tmp1, const_112);
+ reg1 = __lsx_vmaddwev_h_bu(const_0x8080, tmp3, const_112);
+ reg2 = __lsx_vmulwev_h_bu(tmp0, const_18);
+ reg3 = __lsx_vmulwev_h_bu(tmp2, const_18);
+ reg2 = __lsx_vmaddwod_h_bu(reg2, tmp0, const_94);
+ reg3 = __lsx_vmaddwod_h_bu(reg3, tmp2, const_94);
+ reg0 = __lsx_vsub_h(reg0, reg2);
+ reg1 = __lsx_vsub_h(reg1, reg3);
+ reg0 = __lsx_vsrai_h(reg0, 8);
+ reg1 = __lsx_vsrai_h(reg1, 8);
+ dst1 = __lsx_vpickev_b(reg1, reg0);
+
+ __lsx_vst(dst0, dst_u, 0);
+ __lsx_vst(dst1, dst_v, 0);
+ dst_u += 16;
+ dst_v += 16;
+ src_argb += 64;
+ }
+}
+
+void ARGBMultiplyRow_LSX(const uint8_t* src_argb0,
+ const uint8_t* src_argb1,
+ uint8_t* dst_argb,
+ int width) {
+ int x;
+ int len = width / 4;
+ __m128i zero = __lsx_vldi(0);
+ __m128i src0, src1, dst0, dst1;
+ __m128i tmp0, tmp1, tmp2, tmp3;
+
+ for (x = 0; x < len; x++) {
+ DUP2_ARG2(__lsx_vld, src_argb0, 0, src_argb1, 0, src0, src1);
+ tmp0 = __lsx_vilvl_b(src0, src0);
+ tmp1 = __lsx_vilvh_b(src0, src0);
+ tmp2 = __lsx_vilvl_b(zero, src1);
+ tmp3 = __lsx_vilvh_b(zero, src1);
+ dst0 = __lsx_vmuh_hu(tmp0, tmp2);
+ dst1 = __lsx_vmuh_hu(tmp1, tmp3);
+ dst0 = __lsx_vpickev_b(dst1, dst0);
+ __lsx_vst(dst0, dst_argb, 0);
+ src_argb0 += 16;
+ src_argb1 += 16;
+ dst_argb += 16;
+ }
+}
+
+void ARGBAddRow_LSX(const uint8_t* src_argb0,
+ const uint8_t* src_argb1,
+ uint8_t* dst_argb,
+ int width) {
+ int x;
+ int len = width / 4;
+ __m128i src0, src1, dst0;
+
+ for (x = 0; x < len; x++) {
+ DUP2_ARG2(__lsx_vld, src_argb0, 0, src_argb1, 0, src0, src1);
+ dst0 = __lsx_vsadd_bu(src0, src1);
+ __lsx_vst(dst0, dst_argb, 0);
+ src_argb0 += 16;
+ src_argb1 += 16;
+ dst_argb += 16;
+ }
+}
+
+void ARGBSubtractRow_LSX(const uint8_t* src_argb0,
+ const uint8_t* src_argb1,
+ uint8_t* dst_argb,
+ int width) {
+ int x;
+ int len = width / 4;
+ __m128i src0, src1, dst0;
+
+ for (x = 0; x < len; x++) {
+ DUP2_ARG2(__lsx_vld, src_argb0, 0, src_argb1, 0, src0, src1);
+ dst0 = __lsx_vssub_bu(src0, src1);
+ __lsx_vst(dst0, dst_argb, 0);
+ src_argb0 += 16;
+ src_argb1 += 16;
+ dst_argb += 16;
+ }
+}
+
+void ARGBAttenuateRow_LSX(const uint8_t* src_argb,
+ uint8_t* dst_argb,
+ int width) {
+ int x;
+ int len = width / 8;
+ __m128i src0, src1, tmp0, tmp1;
+ __m128i reg0, reg1, reg2, reg3, reg4, reg5;
+ __m128i b, g, r, a, dst0, dst1;
+ __m128i control = {0x0005000100040000, 0x0007000300060002};
+
+ for (x = 0; x < len; x++) {
+ DUP2_ARG2(__lsx_vld, src_argb, 0, src_argb, 16, src0, src1);
+ tmp0 = __lsx_vpickev_b(src1, src0);
+ tmp1 = __lsx_vpickod_b(src1, src0);
+ b = __lsx_vpackev_b(tmp0, tmp0);
+ r = __lsx_vpackod_b(tmp0, tmp0);
+ g = __lsx_vpackev_b(tmp1, tmp1);
+ a = __lsx_vpackod_b(tmp1, tmp1);
+ reg0 = __lsx_vmulwev_w_hu(b, a);
+ reg1 = __lsx_vmulwod_w_hu(b, a);
+ reg2 = __lsx_vmulwev_w_hu(r, a);
+ reg3 = __lsx_vmulwod_w_hu(r, a);
+ reg4 = __lsx_vmulwev_w_hu(g, a);
+ reg5 = __lsx_vmulwod_w_hu(g, a);
+ reg0 = __lsx_vssrani_h_w(reg1, reg0, 24);
+ reg2 = __lsx_vssrani_h_w(reg3, reg2, 24);
+ reg4 = __lsx_vssrani_h_w(reg5, reg4, 24);
+ reg0 = __lsx_vshuf_h(control, reg0, reg0);
+ reg2 = __lsx_vshuf_h(control, reg2, reg2);
+ reg4 = __lsx_vshuf_h(control, reg4, reg4);
+ tmp0 = __lsx_vpackev_b(reg4, reg0);
+ tmp1 = __lsx_vpackev_b(a, reg2);
+ dst0 = __lsx_vilvl_h(tmp1, tmp0);
+ dst1 = __lsx_vilvh_h(tmp1, tmp0);
+ __lsx_vst(dst0, dst_argb, 0);
+ __lsx_vst(dst1, dst_argb, 16);
+ dst_argb += 32;
+ src_argb += 32;
+ }
+}
+
+void ARGBToRGB565DitherRow_LSX(const uint8_t* src_argb,
+ uint8_t* dst_rgb,
+ uint32_t dither4,
+ int width) {
+ int x;
+ int len = width / 8;
+ __m128i src0, src1, tmp0, tmp1, dst0;
+ __m128i b, g, r;
+ __m128i zero = __lsx_vldi(0);
+ __m128i vec_dither = __lsx_vldrepl_w(&dither4, 0);
+
+ vec_dither = __lsx_vilvl_b(zero, vec_dither);
+ for (x = 0; x < len; x++) {
+ DUP2_ARG2(__lsx_vld, src_argb, 0, src_argb, 16, src0, src1);
+ tmp0 = __lsx_vpickev_b(src1, src0);
+ tmp1 = __lsx_vpickod_b(src1, src0);
+ b = __lsx_vpackev_b(zero, tmp0);
+ r = __lsx_vpackod_b(zero, tmp0);
+ g = __lsx_vpackev_b(zero, tmp1);
+ b = __lsx_vadd_h(b, vec_dither);
+ g = __lsx_vadd_h(g, vec_dither);
+ r = __lsx_vadd_h(r, vec_dither);
+ DUP2_ARG1(__lsx_vclip255_h, b, g, b, g);
+ r = __lsx_vclip255_h(r);
+ b = __lsx_vsrai_h(b, 3);
+ g = __lsx_vsrai_h(g, 2);
+ r = __lsx_vsrai_h(r, 3);
+ g = __lsx_vslli_h(g, 5);
+ r = __lsx_vslli_h(r, 11);
+ dst0 = __lsx_vor_v(b, g);
+ dst0 = __lsx_vor_v(dst0, r);
+ __lsx_vst(dst0, dst_rgb, 0);
+ src_argb += 32;
+ dst_rgb += 16;
+ }
+}
+
+void ARGBShuffleRow_LSX(const uint8_t* src_argb,
+ uint8_t* dst_argb,
+ const uint8_t* shuffler,
+ int width) {
+ int x;
+ int len = width / 8;
+ __m128i src0, src1, dst0, dst1;
+ __m128i shuf = {0x0404040400000000, 0x0C0C0C0C08080808};
+ __m128i temp = __lsx_vldrepl_w(shuffler, 0);
+
+ shuf = __lsx_vadd_b(shuf, temp);
+ for (x = 0; x < len; x++) {
+ DUP2_ARG2(__lsx_vld, src_argb, 0, src_argb, 16, src0, src1);
+ dst0 = __lsx_vshuf_b(src0, src0, shuf);
+ dst1 = __lsx_vshuf_b(src1, src1, shuf);
+ __lsx_vst(dst0, dst_argb, 0);
+ __lsx_vst(dst1, dst_argb, 16);
+ src_argb += 32;
+ dst_argb += 32;
+ }
+}
+
+void ARGBShadeRow_LSX(const uint8_t* src_argb,
+ uint8_t* dst_argb,
+ int width,
+ uint32_t value) {
+ int x;
+ int len = width / 4;
+ __m128i src0, dst0, tmp0, tmp1;
+ __m128i vec_value = __lsx_vreplgr2vr_w(value);
+
+ vec_value = __lsx_vilvl_b(vec_value, vec_value);
+ for (x = 0; x < len; x++) {
+ src0 = __lsx_vld(src_argb, 0);
+ tmp0 = __lsx_vilvl_b(src0, src0);
+ tmp1 = __lsx_vilvh_b(src0, src0);
+ tmp0 = __lsx_vmuh_hu(tmp0, vec_value);
+ tmp1 = __lsx_vmuh_hu(tmp1, vec_value);
+ dst0 = __lsx_vpickod_b(tmp1, tmp0);
+ __lsx_vst(dst0, dst_argb, 0);
+ src_argb += 16;
+ dst_argb += 16;
+ }
+}
+
+void ARGBGrayRow_LSX(const uint8_t* src_argb, uint8_t* dst_argb, int width) {
+ int x;
+ int len = width / 8;
+ __m128i src0, src1, tmp0, tmp1;
+ __m128i reg0, reg1, reg2, dst0, dst1;
+ __m128i const_128 = __lsx_vldi(0x480);
+ __m128i const_150 = __lsx_vldi(0x96);
+ __m128i const_br = {0x4D1D4D1D4D1D4D1D, 0x4D1D4D1D4D1D4D1D};
+
+ for (x = 0; x < len; x++) {
+ DUP2_ARG2(__lsx_vld, src_argb, 0, src_argb, 16, src0, src1);
+ tmp0 = __lsx_vpickev_b(src1, src0);
+ tmp1 = __lsx_vpickod_b(src1, src0);
+ reg0 = __lsx_vdp2_h_bu(tmp0, const_br);
+ reg1 = __lsx_vmaddwev_h_bu(const_128, tmp1, const_150);
+ reg2 = __lsx_vadd_h(reg0, reg1);
+ tmp0 = __lsx_vpackod_b(reg2, reg2);
+ tmp1 = __lsx_vpackod_b(tmp1, reg2);
+ dst0 = __lsx_vilvl_h(tmp1, tmp0);
+ dst1 = __lsx_vilvh_h(tmp1, tmp0);
+ __lsx_vst(dst0, dst_argb, 0);
+ __lsx_vst(dst1, dst_argb, 16);
+ src_argb += 32;
+ dst_argb += 32;
+ }
+}
+
+void ARGBSepiaRow_LSX(uint8_t* dst_argb, int width) {
+ int x;
+ int len = width / 8;
+ __m128i src0, src1, tmp0, tmp1;
+ __m128i reg0, reg1, spb, spg, spr;
+ __m128i dst0, dst1;
+ __m128i spb_g = __lsx_vldi(68);
+ __m128i spg_g = __lsx_vldi(88);
+ __m128i spr_g = __lsx_vldi(98);
+ __m128i spb_br = {0x2311231123112311, 0x2311231123112311};
+ __m128i spg_br = {0x2D162D162D162D16, 0x2D162D162D162D16};
+ __m128i spr_br = {0x3218321832183218, 0x3218321832183218};
+ __m128i shuff = {0x1706150413021100, 0x1F0E1D0C1B0A1908};
+
+ for (x = 0; x < len; x++) {
+ DUP2_ARG2(__lsx_vld, dst_argb, 0, dst_argb, 16, src0, src1);
+ tmp0 = __lsx_vpickev_b(src1, src0);
+ tmp1 = __lsx_vpickod_b(src1, src0);
+ DUP2_ARG2(__lsx_vdp2_h_bu, tmp0, spb_br, tmp0, spg_br, spb, spg);
+ spr = __lsx_vdp2_h_bu(tmp0, spr_br);
+ spb = __lsx_vmaddwev_h_bu(spb, tmp1, spb_g);
+ spg = __lsx_vmaddwev_h_bu(spg, tmp1, spg_g);
+ spr = __lsx_vmaddwev_h_bu(spr, tmp1, spr_g);
+ spb = __lsx_vsrli_h(spb, 7);
+ spg = __lsx_vsrli_h(spg, 7);
+ spr = __lsx_vsrli_h(spr, 7);
+ spg = __lsx_vsat_hu(spg, 7);
+ spr = __lsx_vsat_hu(spr, 7);
+ reg0 = __lsx_vpackev_b(spg, spb);
+ reg1 = __lsx_vshuf_b(tmp1, spr, shuff);
+ dst0 = __lsx_vilvl_h(reg1, reg0);
+ dst1 = __lsx_vilvh_h(reg1, reg0);
+ __lsx_vst(dst0, dst_argb, 0);
+ __lsx_vst(dst1, dst_argb, 16);
+ dst_argb += 32;
+ }
+}
+
void ARGB4444ToARGBRow_LSX(const uint8_t* src_argb4444,
uint8_t* dst_argb,
int width) {
@@ -561,39 +1687,6 @@ void RGB565ToUVRow_LSX(const uint8_t* src_rgb565,
}
}
-void RGB24ToYRow_LSX(const uint8_t* src_rgb24, uint8_t* dst_y, int width) {
- int x;
- int len = width / 16;
- __m128i src0, src1, src2;
- __m128i tmp0, tmp1, tmp2, tmp3;
- __m128i reg0, reg1, dst0;
- __m128i const_129 = __lsx_vldi(129);
- __m128i const_br = {0x4219421942194219, 0x4219421942194219};
- __m128i const_1080 = {0x1080108010801080, 0x1080108010801080};
- __m128i shuff0 = {0x0B09080605030200, 0x17151412110F0E0C};
- __m128i shuff1 = {0x0301001E1D1B1A18, 0x0F0D0C0A09070604};
- __m128i shuff2 = {0x000A000700040001, 0x001600130010000D};
- __m128i shuff3 = {0x0002001F001C0019, 0x000E000B00080005};
-
- for (x = 0; x < len; x++) {
- src0 = __lsx_vld(src_rgb24, 0);
- src1 = __lsx_vld(src_rgb24, 16);
- src2 = __lsx_vld(src_rgb24, 32);
- tmp0 = __lsx_vshuf_b(src1, src0, shuff0);
- tmp1 = __lsx_vshuf_b(src1, src2, shuff1);
- tmp2 = __lsx_vshuf_b(src1, src0, shuff2);
- tmp3 = __lsx_vshuf_b(src1, src2, shuff3);
- reg0 = __lsx_vmaddwev_h_bu(const_1080, tmp2, const_129);
- reg1 = __lsx_vmaddwev_h_bu(const_1080, tmp3, const_129);
- reg0 = __lsx_vdp2add_h_bu(reg0, const_br, tmp0);
- reg1 = __lsx_vdp2add_h_bu(reg1, const_br, tmp1);
- dst0 = __lsx_vpickod_b(reg1, reg0);
- __lsx_vst(dst0, dst_y, 0);
- dst_y += 16;
- src_rgb24 += 48;
- }
-}
-
void RGB24ToUVRow_LSX(const uint8_t* src_rgb24,
int src_stride_rgb24,
uint8_t* dst_u,
@@ -647,39 +1740,6 @@ void RGB24ToUVRow_LSX(const uint8_t* src_rgb24,
}
}
-void RAWToYRow_LSX(const uint8_t* src_raw, uint8_t* dst_y, int width) {
- int x;
- int len = width / 16;
- __m128i src0, src1, src2;
- __m128i tmp0, tmp1, tmp2, tmp3;
- __m128i reg0, reg1, dst0;
- __m128i const_129 = __lsx_vldi(129);
- __m128i const_br = {0x1942194219421942, 0x1942194219421942};
- __m128i const_1080 = {0x1080108010801080, 0x1080108010801080};
- __m128i shuff0 = {0x0B09080605030200, 0x17151412110F0E0C};
- __m128i shuff1 = {0x0301001E1D1B1A18, 0x0F0D0C0A09070604};
- __m128i shuff2 = {0x000A000700040001, 0x001600130010000D};
- __m128i shuff3 = {0x0002001F001C0019, 0x000E000B00080005};
-
- for (x = 0; x < len; x++) {
- src0 = __lsx_vld(src_raw, 0);
- src1 = __lsx_vld(src_raw, 16);
- src2 = __lsx_vld(src_raw, 32);
- tmp0 = __lsx_vshuf_b(src1, src0, shuff0);
- tmp1 = __lsx_vshuf_b(src1, src2, shuff1);
- tmp2 = __lsx_vshuf_b(src1, src0, shuff2);
- tmp3 = __lsx_vshuf_b(src1, src2, shuff3);
- reg0 = __lsx_vmaddwev_h_bu(const_1080, tmp2, const_129);
- reg1 = __lsx_vmaddwev_h_bu(const_1080, tmp3, const_129);
- reg0 = __lsx_vdp2add_h_bu(reg0, const_br, tmp0);
- reg1 = __lsx_vdp2add_h_bu(reg1, const_br, tmp1);
- dst0 = __lsx_vsrlni_b_h(reg1, reg0, 8);
- __lsx_vst(dst0, dst_y, 0);
- dst_y += 16;
- src_raw += 48;
- }
-}
-
void RAWToUVRow_LSX(const uint8_t* src_raw,
int src_stride_raw,
uint8_t* dst_u,
@@ -914,62 +1974,6 @@ void SobelXYRow_LSX(const uint8_t* src_sobelx,
}
}
-void ARGBToYJRow_LSX(const uint8_t* src_argb, uint8_t* dst_y, int width) {
- int x;
- int len = width / 16;
- __m128i src0, src1, src2, src3, dst0;
- __m128i tmp0, tmp1, tmp2, tmp3;
- __m128i reg0, reg1;
- __m128i const_128 = __lsx_vldi(0x480);
- __m128i const_150 = __lsx_vldi(0x96);
- __m128i const_br = {0x4D1D4D1D4D1D4D1D, 0x4D1D4D1D4D1D4D1D};
-
- for (x = 0; x < len; x++) {
- DUP4_ARG2(__lsx_vld, src_argb, 0, src_argb, 16, src_argb, 32, src_argb, 48,
- src0, src1, src2, src3);
- tmp0 = __lsx_vpickev_b(src1, src0);
- tmp1 = __lsx_vpickod_b(src1, src0);
- tmp2 = __lsx_vpickev_b(src3, src2);
- tmp3 = __lsx_vpickod_b(src3, src2);
- reg0 = __lsx_vmaddwev_h_bu(const_128, tmp1, const_150);
- reg1 = __lsx_vmaddwev_h_bu(const_128, tmp3, const_150);
- reg0 = __lsx_vdp2add_h_bu(reg0, const_br, tmp0);
- reg1 = __lsx_vdp2add_h_bu(reg1, const_br, tmp2);
- dst0 = __lsx_vpickod_b(reg1, reg0);
- __lsx_vst(dst0, dst_y, 0);
- dst_y += 16;
- src_argb += 64;
- }
-}
-
-void BGRAToYRow_LSX(const uint8_t* src_bgra, uint8_t* dst_y, int width) {
- int x;
- int len = width / 16;
- __m128i src0, src1, src2, src3, dst0;
- __m128i tmp0, tmp1, tmp2, tmp3;
- __m128i reg0, reg1;
- __m128i const_129 = __lsx_vldi(0x81);
- __m128i const_br = {0x1942194219421942, 0x1942194219421942};
- __m128i const_1080 = {0x1080108010801080, 0x1080108010801080};
-
- for (x = 0; x < len; x++) {
- DUP4_ARG2(__lsx_vld, src_bgra, 0, src_bgra, 16, src_bgra, 32, src_bgra, 48,
- src0, src1, src2, src3);
- tmp0 = __lsx_vpickod_b(src1, src0);
- tmp1 = __lsx_vpickev_b(src1, src0);
- tmp2 = __lsx_vpickod_b(src3, src2);
- tmp3 = __lsx_vpickev_b(src3, src2);
- reg0 = __lsx_vmaddwod_h_bu(const_1080, tmp1, const_129);
- reg1 = __lsx_vmaddwod_h_bu(const_1080, tmp3, const_129);
- reg0 = __lsx_vdp2add_h_bu(reg0, const_br, tmp0);
- reg1 = __lsx_vdp2add_h_bu(reg1, const_br, tmp2);
- dst0 = __lsx_vsrlni_b_h(reg1, reg0, 8);
- __lsx_vst(dst0, dst_y, 0);
- dst_y += 16;
- src_bgra += 64;
- }
-}
-
void BGRAToUVRow_LSX(const uint8_t* src_bgra,
int src_stride_bgra,
uint8_t* dst_u,
@@ -1018,34 +2022,6 @@ void BGRAToUVRow_LSX(const uint8_t* src_bgra,
}
}
-void ABGRToYRow_LSX(const uint8_t* src_abgr, uint8_t* dst_y, int width) {
- int x;
- int len = width / 16;
- __m128i src0, src1, src2, src3, dst0;
- __m128i tmp0, tmp1, tmp2, tmp3;
- __m128i reg0, reg1;
- __m128i const_129 = __lsx_vldi(0x81);
- __m128i const_br = {0x1942194219421942, 0x1942194219421942};
- __m128i const_1080 = {0x1080108010801080, 0x1080108010801080};
-
- for (x = 0; x < len; x++) {
- DUP4_ARG2(__lsx_vld, src_abgr, 0, src_abgr, 16, src_abgr, 32, src_abgr, 48,
- src0, src1, src2, src3);
- tmp0 = __lsx_vpickev_b(src1, src0);
- tmp1 = __lsx_vpickod_b(src1, src0);
- tmp2 = __lsx_vpickev_b(src3, src2);
- tmp3 = __lsx_vpickod_b(src3, src2);
- reg0 = __lsx_vmaddwev_h_bu(const_1080, tmp1, const_129);
- reg1 = __lsx_vmaddwev_h_bu(const_1080, tmp3, const_129);
- reg0 = __lsx_vdp2add_h_bu(reg0, const_br, tmp0);
- reg1 = __lsx_vdp2add_h_bu(reg1, const_br, tmp2);
- dst0 = __lsx_vsrlni_b_h(reg1, reg0, 8);
- __lsx_vst(dst0, dst_y, 0);
- dst_y += 16;
- src_abgr += 64;
- }
-}
-
void ABGRToUVRow_LSX(const uint8_t* src_abgr,
int src_stride_abgr,
uint8_t* dst_u,
@@ -1094,34 +2070,6 @@ void ABGRToUVRow_LSX(const uint8_t* src_abgr,
}
}
-void RGBAToYRow_LSX(const uint8_t* src_rgba, uint8_t* dst_y, int width) {
- int x;
- int len = width / 16;
- __m128i src0, src1, src2, src3, dst0;
- __m128i tmp0, tmp1, tmp2, tmp3;
- __m128i reg0, reg1;
- __m128i const_129 = __lsx_vldi(0x81);
- __m128i const_br = {0x4219421942194219, 0x4219421942194219};
- __m128i const_1080 = {0x1080108010801080, 0x1080108010801080};
-
- for (x = 0; x < len; x++) {
- DUP4_ARG2(__lsx_vld, src_rgba, 0, src_rgba, 16, src_rgba, 32, src_rgba, 48,
- src0, src1, src2, src3);
- tmp0 = __lsx_vpickod_b(src1, src0);
- tmp1 = __lsx_vpickev_b(src1, src0);
- tmp2 = __lsx_vpickod_b(src3, src2);
- tmp3 = __lsx_vpickev_b(src3, src2);
- reg0 = __lsx_vmaddwod_h_bu(const_1080, tmp1, const_129);
- reg1 = __lsx_vmaddwod_h_bu(const_1080, tmp3, const_129);
- reg0 = __lsx_vdp2add_h_bu(reg0, const_br, tmp0);
- reg1 = __lsx_vdp2add_h_bu(reg1, const_br, tmp2);
- dst0 = __lsx_vsrlni_b_h(reg1, reg0, 8);
- __lsx_vst(dst0, dst_y, 0);
- dst_y += 16;
- src_rgba += 64;
- }
-}
-
void RGBAToUVRow_LSX(const uint8_t* src_rgba,
int src_stride_rgba,
uint8_t* dst_u,
@@ -1821,6 +2769,216 @@ void HalfFloatRow_LSX(const uint16_t* src,
}
}
+struct RgbConstants {
+ uint8_t kRGBToY[4];
+ uint16_t kAddY;
+ uint16_t pad;
+};
+
+// RGB to JPeg coefficients
+// B * 0.1140 coefficient = 29
+// G * 0.5870 coefficient = 150
+// R * 0.2990 coefficient = 77
+// Add 0.5 = 0x80
+static const struct RgbConstants kRgb24JPEGConstants = {{29, 150, 77, 0},
+ 128,
+ 0};
+
+static const struct RgbConstants kRawJPEGConstants = {{77, 150, 29, 0}, 128, 0};
+
+// RGB to BT.601 coefficients
+// B * 0.1016 coefficient = 25
+// G * 0.5078 coefficient = 129
+// R * 0.2578 coefficient = 66
+// Add 16.5 = 0x1080
+
+static const struct RgbConstants kRgb24I601Constants = {{25, 129, 66, 0},
+ 0x1080,
+ 0};
+
+static const struct RgbConstants kRawI601Constants = {{66, 129, 25, 0},
+ 0x1080,
+ 0};
+
+// ARGB expects first 3 values to contain RGB and 4th value is ignored.
+static void ARGBToYMatrixRow_LSX(const uint8_t* src_argb,
+ uint8_t* dst_y,
+ int width,
+ const struct RgbConstants* rgbconstants) {
+ asm volatile(
+ "vldrepl.b $vr0, %3, 0 \n\t" // load rgbconstants
+ "vldrepl.b $vr1, %3, 1 \n\t" // load rgbconstants
+ "vldrepl.b $vr2, %3, 2 \n\t" // load rgbconstants
+ "vldrepl.h $vr3, %3, 4 \n\t" // load rgbconstants
+ "1: \n\t"
+ "vld $vr4, %0, 0 \n\t"
+ "vld $vr5, %0, 16 \n\t"
+ "vld $vr6, %0, 32 \n\t"
+ "vld $vr7, %0, 48 \n\t" // load 16 pixels of
+ // ARGB
+ "vor.v $vr12, $vr3, $vr3 \n\t"
+ "vor.v $vr13, $vr3, $vr3 \n\t"
+ "addi.d %2, %2, -16 \n\t" // 16 processed per
+ // loop.
+ "vpickev.b $vr8, $vr5, $vr4 \n\t" // BR
+ "vpickev.b $vr10, $vr7, $vr6 \n\t"
+ "vpickod.b $vr9, $vr5, $vr4 \n\t" // GA
+ "vpickod.b $vr11, $vr7, $vr6 \n\t"
+ "vmaddwev.h.bu $vr12, $vr8, $vr0 \n\t" // B
+ "vmaddwev.h.bu $vr13, $vr10, $vr0 \n\t"
+ "vmaddwev.h.bu $vr12, $vr9, $vr1 \n\t" // G
+ "vmaddwev.h.bu $vr13, $vr11, $vr1 \n\t"
+ "vmaddwod.h.bu $vr12, $vr8, $vr2 \n\t" // R
+ "vmaddwod.h.bu $vr13, $vr10, $vr2 \n\t"
+ "addi.d %0, %0, 64 \n\t"
+ "vpickod.b $vr10, $vr13, $vr12 \n\t"
+ "vst $vr10, %1, 0 \n\t"
+ "addi.d %1, %1, 16 \n\t"
+ "bnez %2, 1b \n\t"
+ : "+&r"(src_argb), // %0
+ "+&r"(dst_y), // %1
+ "+&r"(width) // %2
+ : "r"(rgbconstants)
+ : "memory");
+}
+
+void ARGBToYRow_LSX(const uint8_t* src_argb, uint8_t* dst_y, int width) {
+ ARGBToYMatrixRow_LSX(src_argb, dst_y, width, &kRgb24I601Constants);
+}
+
+void ARGBToYJRow_LSX(const uint8_t* src_argb, uint8_t* dst_yj, int width) {
+ ARGBToYMatrixRow_LSX(src_argb, dst_yj, width, &kRgb24JPEGConstants);
+}
+
+void ABGRToYRow_LSX(const uint8_t* src_abgr, uint8_t* dst_y, int width) {
+ ARGBToYMatrixRow_LSX(src_abgr, dst_y, width, &kRawI601Constants);
+}
+
+void ABGRToYJRow_LSX(const uint8_t* src_abgr, uint8_t* dst_yj, int width) {
+ ARGBToYMatrixRow_LSX(src_abgr, dst_yj, width, &kRawJPEGConstants);
+}
+
+// RGBA expects first value to be A and ignored, then 3 values to contain RGB.
+// Same code as ARGB, except the LD4
+static void RGBAToYMatrixRow_LSX(const uint8_t* src_rgba,
+ uint8_t* dst_y,
+ int width,
+ const struct RgbConstants* rgbconstants) {
+ asm volatile(
+ "vldrepl.b $vr0, %3, 0 \n\t" // load rgbconstants
+ "vldrepl.b $vr1, %3, 1 \n\t" // load rgbconstants
+ "vldrepl.b $vr2, %3, 2 \n\t" // load rgbconstants
+ "vldrepl.h $vr3, %3, 4 \n\t" // load rgbconstants
+ "1: \n\t"
+ "vld $vr4, %0, 0 \n\t"
+ "vld $vr5, %0, 16 \n\t"
+ "vld $vr6, %0, 32 \n\t"
+ "vld $vr7, %0, 48 \n\t" // load 16 pixels of
+ // RGBA
+ "vor.v $vr12, $vr3, $vr3 \n\t"
+ "vor.v $vr13, $vr3, $vr3 \n\t"
+ "addi.d %2, %2, -16 \n\t" // 16 processed per
+ // loop.
+ "vpickev.b $vr8, $vr5, $vr4 \n\t" // AG
+ "vpickev.b $vr10, $vr7, $vr6 \n\t"
+ "vpickod.b $vr9, $vr5, $vr4 \n\t" // BR
+ "vpickod.b $vr11, $vr7, $vr6 \n\t"
+ "vmaddwev.h.bu $vr12, $vr9, $vr0 \n\t" // B
+ "vmaddwev.h.bu $vr13, $vr11, $vr0 \n\t"
+ "vmaddwod.h.bu $vr12, $vr8, $vr1 \n\t" // G
+ "vmaddwod.h.bu $vr13, $vr10, $vr1 \n\t"
+ "vmaddwod.h.bu $vr12, $vr9, $vr2 \n\t" // R
+ "vmaddwod.h.bu $vr13, $vr11, $vr2 \n\t"
+ "addi.d %0, %0, 64 \n\t"
+ "vpickod.b $vr10, $vr13, $vr12 \n\t"
+ "vst $vr10, %1, 0 \n\t"
+ "addi.d %1, %1, 16 \n\t"
+ "bnez %2, 1b \n\t"
+ : "+&r"(src_rgba), // %0
+ "+&r"(dst_y), // %1
+ "+&r"(width) // %2
+ : "r"(rgbconstants)
+ : "memory");
+}
+
+void RGBAToYRow_LSX(const uint8_t* src_rgba, uint8_t* dst_y, int width) {
+ RGBAToYMatrixRow_LSX(src_rgba, dst_y, width, &kRgb24I601Constants);
+}
+
+void RGBAToYJRow_LSX(const uint8_t* src_rgba, uint8_t* dst_yj, int width) {
+ RGBAToYMatrixRow_LSX(src_rgba, dst_yj, width, &kRgb24JPEGConstants);
+}
+
+void BGRAToYRow_LSX(const uint8_t* src_bgra, uint8_t* dst_y, int width) {
+ RGBAToYMatrixRow_LSX(src_bgra, dst_y, width, &kRawI601Constants);
+}
+
+static void RGBToYMatrixRow_LSX(const uint8_t* src_rgba,
+ uint8_t* dst_y,
+ int width,
+ const struct RgbConstants* rgbconstants) {
+ int8_t shuff[64] = {0, 2, 3, 5, 6, 8, 9, 11, 12, 14, 15, 17, 18,
+ 20, 21, 23, 24, 26, 27, 29, 30, 0, 1, 3, 4, 6,
+ 7, 9, 10, 12, 13, 15, 1, 0, 4, 0, 7, 0, 10,
+ 0, 13, 0, 16, 0, 19, 0, 22, 0, 25, 0, 28, 0,
+ 31, 0, 2, 0, 5, 0, 8, 0, 11, 0, 14, 0};
+ asm volatile(
+ "vldrepl.b $vr0, %3, 0 \n\t" // load rgbconstants
+ "vldrepl.b $vr1, %3, 1 \n\t" // load rgbconstants
+ "vldrepl.b $vr2, %3, 2 \n\t" // load rgbconstants
+ "vldrepl.h $vr3, %3, 4 \n\t" // load rgbconstants
+ "vld $vr4, %4, 0 \n\t" // load shuff
+ "vld $vr5, %4, 16 \n\t"
+ "vld $vr6, %4, 32 \n\t"
+ "vld $vr7, %4, 48 \n\t"
+ "1: \n\t"
+ "vld $vr8, %0, 0 \n\t"
+ "vld $vr9, %0, 16 \n\t"
+ "vld $vr10, %0, 32 \n\t" // load 16 pixels of
+ // RGB
+ "vor.v $vr12, $vr3, $vr3 \n\t"
+ "vor.v $vr13, $vr3, $vr3 \n\t"
+ "addi.d %2, %2, -16 \n\t" // 16 processed per
+ // loop.
+ "vshuf.b $vr14, $vr9, $vr8, $vr4 \n\t"
+ "vshuf.b $vr15, $vr9, $vr10, $vr5 \n\t"
+ "vshuf.b $vr16, $vr9, $vr8, $vr6 \n\t"
+ "vshuf.b $vr17, $vr9, $vr10, $vr7 \n\t"
+ "vmaddwev.h.bu $vr12, $vr16, $vr1 \n\t" // G
+ "vmaddwev.h.bu $vr13, $vr17, $vr1 \n\t"
+ "vmaddwev.h.bu $vr12, $vr14, $vr0 \n\t" // B
+ "vmaddwev.h.bu $vr13, $vr15, $vr0 \n\t"
+ "vmaddwod.h.bu $vr12, $vr14, $vr2 \n\t" // R
+ "vmaddwod.h.bu $vr13, $vr15, $vr2 \n\t"
+ "addi.d %0, %0, 48 \n\t"
+ "vpickod.b $vr10, $vr13, $vr12 \n\t"
+ "vst $vr10, %1, 0 \n\t"
+ "addi.d %1, %1, 16 \n\t"
+ "bnez %2, 1b \n\t"
+ : "+&r"(src_rgba), // %0
+ "+&r"(dst_y), // %1
+ "+&r"(width) // %2
+ : "r"(rgbconstants), // %3
+ "r"(shuff) // %4
+ : "memory");
+}
+
+void RGB24ToYJRow_LSX(const uint8_t* src_rgb24, uint8_t* dst_yj, int width) {
+ RGBToYMatrixRow_LSX(src_rgb24, dst_yj, width, &kRgb24JPEGConstants);
+}
+
+void RAWToYJRow_LSX(const uint8_t* src_raw, uint8_t* dst_yj, int width) {
+ RGBToYMatrixRow_LSX(src_raw, dst_yj, width, &kRawJPEGConstants);
+}
+
+void RGB24ToYRow_LSX(const uint8_t* src_rgb24, uint8_t* dst_y, int width) {
+ RGBToYMatrixRow_LSX(src_rgb24, dst_y, width, &kRgb24I601Constants);
+}
+
+void RAWToYRow_LSX(const uint8_t* src_raw, uint8_t* dst_y, int width) {
+ RGBToYMatrixRow_LSX(src_raw, dst_y, width, &kRawI601Constants);
+}
+
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
diff --git a/files/source/row_mmi.cc b/files/source/row_mmi.cc
deleted file mode 100644
index 362fd1cf..00000000
--- a/files/source/row_mmi.cc
+++ /dev/null
@@ -1,7842 +0,0 @@
-/*
- * Copyright 2011 The LibYuv Project Authors. All rights reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-#include "libyuv/row.h"
-
-#include <string.h> // For memcpy and memset.
-
-#include "libyuv/basic_types.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-// This module is for Mips MMI.
-#if !defined(LIBYUV_DISABLE_MMI) && defined(_MIPS_ARCH_LOONGSON3A)
-
-// clang-format off
-
-void RGB24ToARGBRow_MMI(const uint8_t* src_rgb24,
- uint8_t* dst_argb,
- int width) {
- uint64_t src0, src1, dest;
- const uint64_t mask = 0xff000000ULL;
-
- __asm__ volatile(
- "1: \n\t"
- "gslwlc1 %[src0], 0x03(%[src_ptr]) \n\t"
- "gslwrc1 %[src0], 0x00(%[src_ptr]) \n\t"
- "gslwlc1 %[src1], 0x06(%[src_ptr]) \n\t"
- "gslwrc1 %[src1], 0x03(%[src_ptr]) \n\t"
-
- "or %[src0], %[src0], %[mask] \n\t"
- "or %[src1], %[src1], %[mask] \n\t"
- "punpcklwd %[dest], %[src0], %[src1] \n\t"
- "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
- "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
-
- "gslwlc1 %[src0], 0x09(%[src_ptr]) \n\t"
- "gslwrc1 %[src0], 0x06(%[src_ptr]) \n\t"
- "gslwlc1 %[src1], 0x0c(%[src_ptr]) \n\t"
- "gslwrc1 %[src1], 0x09(%[src_ptr]) \n\t"
-
- "or %[src0], %[src0], %[mask] \n\t"
- "or %[src1], %[src1], %[mask] \n\t"
- "punpcklwd %[dest], %[src0], %[src1] \n\t"
- "gssdlc1 %[dest], 0x0f(%[dst_ptr]) \n\t"
- "gssdrc1 %[dest], 0x08(%[dst_ptr]) \n\t"
-
- "daddiu %[src_ptr], %[src_ptr], 0x0c \n\t"
- "daddiu %[dst_ptr], %[dst_ptr], 0x10 \n\t"
- "daddi %[width], %[width], -0x04 \n\t"
- "bnez %[width], 1b \n\t"
- : [src0] "=&f"(src0), [src1] "=&f"(src1), [dest] "=&f"(dest)
- : [src_ptr] "r"(src_rgb24), [dst_ptr] "r"(dst_argb), [width] "r"(width),
- [mask] "f"(mask)
- : "memory");
-}
-
-void RAWToARGBRow_MMI(const uint8_t* src_raw, uint8_t* dst_argb, int width) {
- uint64_t src0, src1, dest;
- const uint64_t mask0 = 0x0;
- const uint64_t mask1 = 0xff000000ULL;
- const uint64_t mask2 = 0xc6;
-
- __asm__ volatile(
- "1: \n\t"
- "gslwlc1 %[src0], 0x03(%[src_ptr]) \n\t"
- "gslwrc1 %[src0], 0x00(%[src_ptr]) \n\t"
- "gslwlc1 %[src1], 0x06(%[src_ptr]) \n\t"
- "gslwrc1 %[src1], 0x03(%[src_ptr]) \n\t"
-
- "or %[src0], %[src0], %[mask1] \n\t"
- "punpcklbh %[src0], %[src0], %[mask0] \n\t"
- "pshufh %[src0], %[src0], %[mask2] \n\t"
- "or %[src1], %[src1], %[mask1] \n\t"
- "punpcklbh %[src1], %[src1], %[mask0] \n\t"
- "pshufh %[src1], %[src1], %[mask2] \n\t"
- "packushb %[dest], %[src0], %[src1] \n\t"
- "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
- "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
-
- "gslwlc1 %[src0], 0x09(%[src_ptr]) \n\t"
- "gslwrc1 %[src0], 0x06(%[src_ptr]) \n\t"
- "gslwlc1 %[src1], 0x0c(%[src_ptr]) \n\t"
- "gslwrc1 %[src1], 0x09(%[src_ptr]) \n\t"
-
- "or %[src0], %[src0], %[mask1] \n\t"
- "punpcklbh %[src0], %[src0], %[mask0] \n\t"
- "pshufh %[src0], %[src0], %[mask2] \n\t"
- "or %[src1], %[src1], %[mask1] \n\t"
- "punpcklbh %[src1], %[src1], %[mask0] \n\t"
- "pshufh %[src1], %[src1], %[mask2] \n\t"
- "packushb %[dest], %[src0], %[src1] \n\t"
- "gssdlc1 %[dest], 0x0f(%[dst_ptr]) \n\t"
- "gssdrc1 %[dest], 0x08(%[dst_ptr]) \n\t"
-
- "daddiu %[src_ptr], %[src_ptr], 0x0c \n\t"
- "daddiu %[dst_ptr], %[dst_ptr], 0x10 \n\t"
- "daddi %[width], %[width], -0x04 \n\t"
- "bnez %[width], 1b \n\t"
- : [src0] "=&f"(src0), [src1] "=&f"(src1), [dest] "=&f"(dest)
- : [src_ptr] "r"(src_raw), [dst_ptr] "r"(dst_argb), [mask0] "f"(mask0),
- [mask1] "f"(mask1), [mask2] "f"(mask2), [width] "r"(width)
- : "memory");
-}
-
-void RAWToRGB24Row_MMI(const uint8_t* src_raw, uint8_t* dst_rgb24, int width) {
- uint64_t src0, src1;
- uint64_t ftmp[4];
- uint64_t mask0 = 0xc6;
- uint64_t mask1 = 0x6c;
-
- __asm__ volatile(
- "1: \n\t"
- "gsldrc1 %[src0], 0x00(%[src_raw]) \n\t"
- "gsldlc1 %[src0], 0x07(%[src_raw]) \n\t"
- "gslwrc1 %[src1], 0x08(%[src_raw]) \n\t"
- "gslwlc1 %[src1], 0x0b(%[src_raw]) \n\t"
-
- "punpcklbh %[ftmp0], %[src0], %[zero] \n\t"
- "pshufh %[ftmp0], %[ftmp0], %[mask0] \n\t"
- "punpckhbh %[ftmp1], %[src0], %[zero] \n\t"
- "punpcklbh %[src1], %[src1], %[zero] \n\t"
- "pextrh %[ftmp2], %[ftmp0], %[three] \n\t"
- "pextrh %[ftmp3], %[ftmp1], %[one] \n\t"
- "pinsrh_3 %[ftmp0], %[ftmp0], %[ftmp3] \n\t"
- "pextrh %[ftmp3], %[ftmp1], %[two] \n\t"
- "pinsrh_1 %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
- "pshufh %[src1], %[src1], %[mask1] \n\t"
- "pextrh %[ftmp2], %[src1], %[zero] \n\t"
- "pinsrh_2 %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
- "pinsrh_0 %[src1], %[src1], %[ftmp3] \n\t"
- "packushb %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
- "packushb %[src1], %[src1], %[zero] \n\t"
-
- "gssdrc1 %[ftmp0], 0x00(%[dst_rgb24]) \n\t"
- "gssdlc1 %[ftmp0], 0x07(%[dst_rgb24]) \n\t"
- "gsswrc1 %[src1], 0x08(%[dst_rgb24]) \n\t"
- "gsswlc1 %[src1], 0x0b(%[dst_rgb24]) \n\t"
-
- "daddiu %[src_raw], %[src_raw], 0x0c \n\t"
- "daddiu %[dst_rgb24], %[dst_rgb24], 0x0c \n\t"
- "daddiu %[width], %[width], -0x04 \n\t"
- "bgtz %[width], 1b \n\t"
- : [src0] "=&f"(src0), [src1] "=&f"(src1), [ftmp0] "=&f"(ftmp[0]),
- [ftmp1] "=&f"(ftmp[1]), [ftmp2] "=&f"(ftmp[2]), [ftmp3] "=&f"(ftmp[3])
- : [src_raw] "r"(src_raw), [dst_rgb24] "r"(dst_rgb24), [width] "r"(width),
- [mask0] "f"(mask0), [mask1] "f"(mask1), [zero] "f"(0x00),
- [one] "f"(0x01), [two] "f"(0x02), [three] "f"(0x03)
- : "memory");
-}
-
-void RGB565ToARGBRow_MMI(const uint8_t* src_rgb565,
- uint8_t* dst_argb,
- int width) {
- uint64_t ftmp[5];
- uint64_t c0 = 0x001f001f001f001f;
- uint64_t c1 = 0x00ff00ff00ff00ff;
- uint64_t c2 = 0x0007000700070007;
- __asm__ volatile(
- "1: \n\t"
- "gsldrc1 %[src0], 0x00(%[src_rgb565]) \n\t"
- "gsldlc1 %[src0], 0x07(%[src_rgb565]) \n\t"
- "psrlh %[src1], %[src0], %[eight] \n\t"
- "and %[b], %[src0], %[c0] \n\t"
- "and %[src0], %[src0], %[c1] \n\t"
- "psrlh %[src0], %[src0], %[five] \n\t"
- "and %[g], %[src1], %[c2] \n\t"
- "psllh %[g], %[g], %[three] \n\t"
- "or %[g], %[src0], %[g] \n\t"
- "psrlh %[r], %[src1], %[three] \n\t"
- "psllh %[src0], %[b], %[three] \n\t"
- "psrlh %[src1], %[b], %[two] \n\t"
- "or %[b], %[src0], %[src1] \n\t"
- "psllh %[src0], %[g], %[two] \n\t"
- "psrlh %[src1], %[g], %[four] \n\t"
- "or %[g], %[src0], %[src1] \n\t"
- "psllh %[src0], %[r], %[three] \n\t"
- "psrlh %[src1], %[r], %[two] \n\t"
- "or %[r], %[src0], %[src1] \n\t"
- "packushb %[b], %[b], %[r] \n\t"
- "packushb %[g], %[g], %[c1] \n\t"
- "punpcklbh %[src0], %[b], %[g] \n\t"
- "punpckhbh %[src1], %[b], %[g] \n\t"
- "punpcklhw %[r], %[src0], %[src1] \n\t"
- "gssdrc1 %[r], 0x00(%[dst_argb]) \n\t"
- "gssdlc1 %[r], 0x07(%[dst_argb]) \n\t"
- "punpckhhw %[r], %[src0], %[src1] \n\t"
- "gssdrc1 %[r], 0x08(%[dst_argb]) \n\t"
- "gssdlc1 %[r], 0x0f(%[dst_argb]) \n\t"
- "daddiu %[src_rgb565], %[src_rgb565], 0x08 \n\t"
- "daddiu %[dst_argb], %[dst_argb], 0x10 \n\t"
- "daddiu %[width], %[width], -0x04 \n\t"
- "bgtz %[width], 1b \n\t"
- : [src0] "=&f"(ftmp[0]), [src1] "=&f"(ftmp[1]), [b] "=&f"(ftmp[2]),
- [g] "=&f"(ftmp[3]), [r] "=&f"(ftmp[4])
- : [src_rgb565] "r"(src_rgb565), [dst_argb] "r"(dst_argb),
- [width] "r"(width), [c0] "f"(c0), [c1] "f"(c1), [c2] "f"(c2),
- [eight] "f"(0x08), [five] "f"(0x05), [three] "f"(0x03), [two] "f"(0x02),
- [four] "f"(0x04)
- : "memory");
-}
-
-void ARGB1555ToARGBRow_MMI(const uint8_t* src_argb1555,
- uint8_t* dst_argb,
- int width) {
- uint64_t ftmp[6];
- uint64_t c0 = 0x001f001f001f001f;
- uint64_t c1 = 0x00ff00ff00ff00ff;
- uint64_t c2 = 0x0003000300030003;
- uint64_t c3 = 0x007c007c007c007c;
- uint64_t c4 = 0x0001000100010001;
- __asm__ volatile(
- "1: \n\t"
- "gsldrc1 %[src0], 0x00(%[src_argb1555]) \n\t"
- "gsldlc1 %[src0], 0x07(%[src_argb1555]) \n\t"
- "psrlh %[src1], %[src0], %[eight] \n\t"
- "and %[b], %[src0], %[c0] \n\t"
- "and %[src0], %[src0], %[c1] \n\t"
- "psrlh %[src0], %[src0], %[five] \n\t"
- "and %[g], %[src1], %[c2] \n\t"
- "psllh %[g], %[g], %[three] \n\t"
- "or %[g], %[src0], %[g] \n\t"
- "and %[r], %[src1], %[c3] \n\t"
- "psrlh %[r], %[r], %[two] \n\t"
- "psrlh %[a], %[src1], %[seven] \n\t"
- "psllh %[src0], %[b], %[three] \n\t"
- "psrlh %[src1], %[b], %[two] \n\t"
- "or %[b], %[src0], %[src1] \n\t"
- "psllh %[src0], %[g], %[three] \n\t"
- "psrlh %[src1], %[g], %[two] \n\t"
- "or %[g], %[src0], %[src1] \n\t"
- "psllh %[src0], %[r], %[three] \n\t"
- "psrlh %[src1], %[r], %[two] \n\t"
- "or %[r], %[src0], %[src1] \n\t"
- "xor %[a], %[a], %[c1] \n\t"
- "paddb %[a], %[a], %[c4] \n\t"
- "packushb %[b], %[b], %[r] \n\t"
- "packushb %[g], %[g], %[a] \n\t"
- "punpcklbh %[src0], %[b], %[g] \n\t"
- "punpckhbh %[src1], %[b], %[g] \n\t"
- "punpcklhw %[r], %[src0], %[src1] \n\t"
- "gssdrc1 %[r], 0x00(%[dst_argb]) \n\t"
- "gssdlc1 %[r], 0x07(%[dst_argb]) \n\t"
- "punpckhhw %[r], %[src0], %[src1] \n\t"
- "gssdrc1 %[r], 0x08(%[dst_argb]) \n\t"
- "gssdlc1 %[r], 0x0f(%[dst_argb]) \n\t"
- "daddiu %[src_argb1555], %[src_argb1555], 0x08 \n\t"
- "daddiu %[dst_argb], %[dst_argb], 0x10 \n\t"
- "daddiu %[width], %[width], -0x04 \n\t"
- "bgtz %[width], 1b \n\t"
- : [src0] "=&f"(ftmp[0]), [src1] "=&f"(ftmp[1]), [b] "=&f"(ftmp[2]),
- [g] "=&f"(ftmp[3]), [r] "=&f"(ftmp[4]), [a] "=&f"(ftmp[5])
- : [src_argb1555] "r"(src_argb1555), [dst_argb] "r"(dst_argb),
- [width] "r"(width), [c0] "f"(c0), [c1] "f"(c1), [c2] "f"(c2),
- [c3] "f"(c3), [c4] "f"(c4), [eight] "f"(0x08), [five] "f"(0x05),
- [three] "f"(0x03), [two] "f"(0x02), [seven] "f"(0x07)
- : "memory");
-}
-
-void ARGB4444ToARGBRow_MMI(const uint8_t* src_argb4444,
- uint8_t* dst_argb,
- int width) {
- uint64_t ftmp[6];
- uint64_t c0 = 0x000f000f000f000f;
- uint64_t c1 = 0x00ff00ff00ff00ff;
- __asm__ volatile(
- "1: \n\t"
- "gsldrc1 %[src0], 0x00(%[src_argb4444]) \n\t"
- "gsldlc1 %[src0], 0x07(%[src_argb4444]) \n\t"
- "psrlh %[src1], %[src0], %[eight] \n\t"
- "and %[b], %[src0], %[c0] \n\t"
- "and %[src0], %[src0], %[c1] \n\t"
- "psrlh %[g], %[src0], %[four] \n\t"
- "and %[r], %[src1], %[c0] \n\t"
- "psrlh %[a], %[src1], %[four] \n\t"
- "psllh %[src0], %[b], %[four] \n\t"
- "or %[b], %[src0], %[b] \n\t"
- "psllh %[src0], %[g], %[four] \n\t"
- "or %[g], %[src0], %[g] \n\t"
- "psllh %[src0], %[r], %[four] \n\t"
- "or %[r], %[src0], %[r] \n\t"
- "psllh %[src0], %[a], %[four] \n\t"
- "or %[a], %[src0], %[a] \n\t"
- "packushb %[b], %[b], %[r] \n\t"
- "packushb %[g], %[g], %[a] \n\t"
- "punpcklbh %[src0], %[b], %[g] \n\t"
- "punpckhbh %[src1], %[b], %[g] \n\t"
- "punpcklhw %[r], %[src0], %[src1] \n\t"
- "gssdrc1 %[r], 0x00(%[dst_argb]) \n\t"
- "gssdlc1 %[r], 0x07(%[dst_argb]) \n\t"
- "punpckhhw %[r], %[src0], %[src1] \n\t"
- "gssdrc1 %[r], 0x08(%[dst_argb]) \n\t"
- "gssdlc1 %[r], 0x0f(%[dst_argb]) \n\t"
- "daddiu %[src_argb4444], %[src_argb4444], 0x08 \n\t"
- "daddiu %[dst_argb], %[dst_argb], 0x10 \n\t"
- "daddiu %[width], %[width], -0x04 \n\t"
- "bgtz %[width], 1b \n\t"
- : [src0] "=&f"(ftmp[0]), [src1] "=&f"(ftmp[1]), [b] "=&f"(ftmp[2]),
- [g] "=&f"(ftmp[3]), [r] "=&f"(ftmp[4]), [a] "=&f"(ftmp[5])
- : [src_argb4444] "r"(src_argb4444), [dst_argb] "r"(dst_argb),
- [width] "r"(width), [c0] "f"(c0), [c1] "f"(c1), [eight] "f"(0x08),
- [four] "f"(0x04)
- : "memory");
-}
-
-void ARGBToRGB24Row_MMI(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
- uint64_t src;
-
- __asm__ volatile(
- "1: \n\t"
- "gslwlc1 %[src], 0x03(%[src_ptr]) \n\t"
- "gslwrc1 %[src], 0x00(%[src_ptr]) \n\t"
- "gsswlc1 %[src], 0x03(%[dst_ptr]) \n\t"
- "gsswrc1 %[src], 0x00(%[dst_ptr]) \n\t"
-
- "gslwlc1 %[src], 0x07(%[src_ptr]) \n\t"
- "gslwrc1 %[src], 0x04(%[src_ptr]) \n\t"
- "gsswlc1 %[src], 0x06(%[dst_ptr]) \n\t"
- "gsswrc1 %[src], 0x03(%[dst_ptr]) \n\t"
-
- "gslwlc1 %[src], 0x0b(%[src_ptr]) \n\t"
- "gslwrc1 %[src], 0x08(%[src_ptr]) \n\t"
- "gsswlc1 %[src], 0x09(%[dst_ptr]) \n\t"
- "gsswrc1 %[src], 0x06(%[dst_ptr]) \n\t"
-
- "gslwlc1 %[src], 0x0f(%[src_ptr]) \n\t"
- "gslwrc1 %[src], 0x0c(%[src_ptr]) \n\t"
- "gsswlc1 %[src], 0x0c(%[dst_ptr]) \n\t"
- "gsswrc1 %[src], 0x09(%[dst_ptr]) \n\t"
-
- "daddiu %[src_ptr], %[src_ptr], 0x10 \n\t"
- "daddiu %[dst_ptr], %[dst_ptr], 0x0c \n\t"
- "daddi %[width], %[width], -0x04 \n\t"
- "bnez %[width], 1b \n\t"
- : [src] "=&f"(src)
- : [src_ptr] "r"(src_argb), [dst_ptr] "r"(dst_rgb), [width] "r"(width)
- : "memory");
-}
-
-void ARGBToRAWRow_MMI(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
- uint64_t src0, src1;
- uint64_t ftmp[3];
- uint64_t mask0 = 0xc6;
- uint64_t mask1 = 0x18;
-
- __asm__ volatile(
- "1: \n\t"
- "gsldrc1 %[src0], 0x00(%[src_argb]) \n\t"
- "gsldlc1 %[src0], 0x07(%[src_argb]) \n\t"
- "gsldrc1 %[src1], 0x08(%[src_argb]) \n\t"
- "gsldlc1 %[src1], 0x0f(%[src_argb]) \n\t"
-
- "punpcklbh %[ftmp0], %[src0], %[zero] \n\t"
- "pshufh %[ftmp0], %[ftmp0], %[mask0] \n\t"
- "punpckhbh %[ftmp1], %[src0], %[zero] \n\t"
- "punpcklbh %[ftmp2], %[src1], %[zero] \n\t"
- "punpckhbh %[src1], %[src1], %[zero] \n\t"
-
- "pextrh %[src0], %[ftmp1], %[two] \n\t"
- "pinsrh_3 %[ftmp0], %[ftmp0], %[src0] \n\t"
- "pshufh %[ftmp1], %[ftmp1], %[one] \n\t"
-
- "pextrh %[src0], %[ftmp2], %[two] \n\t"
- "pinsrh_2 %[ftmp1], %[ftmp1], %[src0] \n\t"
- "pextrh %[src0], %[ftmp2], %[one] \n\t"
- "pinsrh_3 %[ftmp1], %[ftmp1], %[src0] \n\t"
- "pextrh %[src0], %[ftmp2], %[zero] \n\t"
- "pshufh %[src1], %[src1], %[mask1] \n\t"
- "pinsrh_0 %[src1], %[src1], %[src0] \n\t"
- "packushb %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
- "packushb %[src1], %[src1], %[zero] \n\t"
-
- "gssdrc1 %[ftmp0], 0x00(%[dst_rgb]) \n\t"
- "gssdlc1 %[ftmp0], 0x07(%[dst_rgb]) \n\t"
- "gsswrc1 %[src1], 0x08(%[dst_rgb]) \n\t"
- "gsswlc1 %[src1], 0x0b(%[dst_rgb]) \n\t"
-
- "daddiu %[src_argb], %[src_argb], 0x10 \n\t"
- "daddiu %[dst_rgb], %[dst_rgb], 0x0c \n\t"
- "daddiu %[width], %[width], -0x04 \n\t"
- "bgtz %[width], 1b \n\t"
- : [src0] "=&f"(src0), [src1] "=&f"(src1), [ftmp0] "=&f"(ftmp[0]),
- [ftmp1] "=&f"(ftmp[1]), [ftmp2] "=&f"(ftmp[2])
- : [src_argb] "r"(src_argb), [dst_rgb] "r"(dst_rgb), [width] "r"(width),
- [mask0] "f"(mask0), [mask1] "f"(mask1), [zero] "f"(0x00),
- [one] "f"(0x01), [two] "f"(0x02)
- : "memory");
-}
-
-void ARGBToRGB565Row_MMI(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
- uint64_t src0, src1;
- uint64_t ftmp[3];
-
- __asm__ volatile(
- "1: \n\t"
- "gsldrc1 %[src0], 0x00(%[src_argb]) \n\t"
- "gsldlc1 %[src0], 0x07(%[src_argb]) \n\t"
- "gsldrc1 %[src1], 0x08(%[src_argb]) \n\t"
- "gsldlc1 %[src1], 0x0f(%[src_argb]) \n\t"
-
- "punpcklbh %[b], %[src0], %[src1] \n\t"
- "punpckhbh %[g], %[src0], %[src1] \n\t"
- "punpcklbh %[src0], %[b], %[g] \n\t"
- "punpckhbh %[src1], %[b], %[g] \n\t"
- "punpcklbh %[b], %[src0], %[zero] \n\t"
- "punpckhbh %[g], %[src0], %[zero] \n\t"
- "punpcklbh %[r], %[src1], %[zero] \n\t"
-
- "psrlh %[b], %[b], %[three] \n\t"
- "psrlh %[g], %[g], %[two] \n\t"
- "psrlh %[r], %[r], %[three] \n\t"
-
- "psllh %[g], %[g], %[five] \n\t"
- "psllh %[r], %[r], %[eleven] \n\t"
- "or %[b], %[b], %[g] \n\t"
- "or %[b], %[b], %[r] \n\t"
-
- "gssdrc1 %[b], 0x00(%[dst_rgb]) \n\t"
- "gssdlc1 %[b], 0x07(%[dst_rgb]) \n\t"
-
- "daddiu %[src_argb], %[src_argb], 0x10 \n\t"
- "daddiu %[dst_rgb], %[dst_rgb], 0x08 \n\t"
- "daddiu %[width], %[width], -0x04 \n\t"
- "bgtz %[width], 1b \n\t"
- : [src0] "=&f"(src0), [src1] "=&f"(src1), [b] "=&f"(ftmp[0]),
- [g] "=&f"(ftmp[1]), [r] "=&f"(ftmp[2])
- : [src_argb] "r"(src_argb), [dst_rgb] "r"(dst_rgb), [width] "r"(width),
- [zero] "f"(0x00), [two] "f"(0x02), [three] "f"(0x03), [five] "f"(0x05),
- [eleven] "f"(0x0b)
- : "memory");
-}
-
-// dither4 is a row of 4 values from 4x4 dither matrix.
-// The 4x4 matrix contains values to increase RGB. When converting to
-// fewer bits (565) this provides an ordered dither.
-// The order in the 4x4 matrix in first byte is upper left.
-// The 4 values are passed as an int, then referenced as an array, so
-// endian will not affect order of the original matrix. But the dither4
-// will containing the first pixel in the lower byte for little endian
-// or the upper byte for big endian.
-void ARGBToRGB565DitherRow_MMI(const uint8_t* src_argb,
- uint8_t* dst_rgb,
- const uint32_t dither4,
- int width) {
- uint64_t src0, src1;
- uint64_t ftmp[3];
- uint64_t c0 = 0x00ff00ff00ff00ff;
-
- __asm__ volatile(
- "punpcklbh %[dither], %[dither], %[zero] \n\t"
- "1: \n\t"
- "gsldrc1 %[src0], 0x00(%[src_argb]) \n\t"
- "gsldlc1 %[src0], 0x07(%[src_argb]) \n\t"
- "gsldrc1 %[src1], 0x08(%[src_argb]) \n\t"
- "gsldlc1 %[src1], 0x0f(%[src_argb]) \n\t"
-
- "punpcklbh %[b], %[src0], %[src1] \n\t"
- "punpckhbh %[g], %[src0], %[src1] \n\t"
- "punpcklbh %[src0], %[b], %[g] \n\t"
- "punpckhbh %[src1], %[b], %[g] \n\t"
- "punpcklbh %[b], %[src0], %[zero] \n\t"
- "punpckhbh %[g], %[src0], %[zero] \n\t"
- "punpcklbh %[r], %[src1], %[zero] \n\t"
-
- "paddh %[b], %[b], %[dither] \n\t"
- "paddh %[g], %[g], %[dither] \n\t"
- "paddh %[r], %[r], %[dither] \n\t"
- "pcmpgth %[src0], %[b], %[c0] \n\t"
- "or %[src0], %[src0], %[b] \n\t"
- "and %[b], %[src0], %[c0] \n\t"
- "pcmpgth %[src0], %[g], %[c0] \n\t"
- "or %[src0], %[src0], %[g] \n\t"
- "and %[g], %[src0], %[c0] \n\t"
- "pcmpgth %[src0], %[r], %[c0] \n\t"
- "or %[src0], %[src0], %[r] \n\t"
- "and %[r], %[src0], %[c0] \n\t"
-
- "psrlh %[b], %[b], %[three] \n\t"
- "psrlh %[g], %[g], %[two] \n\t"
- "psrlh %[r], %[r], %[three] \n\t"
-
- "psllh %[g], %[g], %[five] \n\t"
- "psllh %[r], %[r], %[eleven] \n\t"
- "or %[b], %[b], %[g] \n\t"
- "or %[b], %[b], %[r] \n\t"
-
- "gssdrc1 %[b], 0x00(%[dst_rgb]) \n\t"
- "gssdlc1 %[b], 0x07(%[dst_rgb]) \n\t"
-
- "daddiu %[src_argb], %[src_argb], 0x10 \n\t"
- "daddiu %[dst_rgb], %[dst_rgb], 0x08 \n\t"
- "daddiu %[width], %[width], -0x04 \n\t"
- "bgtz %[width], 1b \n\t"
- : [src0] "=&f"(src0), [src1] "=&f"(src1), [b] "=&f"(ftmp[0]),
- [g] "=&f"(ftmp[1]), [r] "=&f"(ftmp[2])
- : [src_argb] "r"(src_argb), [dst_rgb] "r"(dst_rgb), [width] "r"(width),
- [dither] "f"(dither4), [c0] "f"(c0), [zero] "f"(0x00), [two] "f"(0x02),
- [three] "f"(0x03), [five] "f"(0x05), [eleven] "f"(0x0b)
- : "memory");
-}
-
-void ARGBToARGB1555Row_MMI(const uint8_t* src_argb,
- uint8_t* dst_rgb,
- int width) {
- uint64_t src0, src1;
- uint64_t ftmp[4];
-
- __asm__ volatile(
- "1: \n\t"
- "gsldrc1 %[src0], 0x00(%[src_argb]) \n\t"
- "gsldlc1 %[src0], 0x07(%[src_argb]) \n\t"
- "gsldrc1 %[src1], 0x08(%[src_argb]) \n\t"
- "gsldlc1 %[src1], 0x0f(%[src_argb]) \n\t"
-
- "punpcklbh %[b], %[src0], %[src1] \n\t"
- "punpckhbh %[g], %[src0], %[src1] \n\t"
- "punpcklbh %[src0], %[b], %[g] \n\t"
- "punpckhbh %[src1], %[b], %[g] \n\t"
- "punpcklbh %[b], %[src0], %[zero] \n\t"
- "punpckhbh %[g], %[src0], %[zero] \n\t"
- "punpcklbh %[r], %[src1], %[zero] \n\t"
- "punpckhbh %[a], %[src1], %[zero] \n\t"
-
- "psrlh %[b], %[b], %[three] \n\t"
- "psrlh %[g], %[g], %[three] \n\t"
- "psrlh %[r], %[r], %[three] \n\t"
- "psrlh %[a], %[a], %[seven] \n\t"
-
- "psllh %[g], %[g], %[five] \n\t"
- "psllh %[r], %[r], %[ten] \n\t"
- "psllh %[a], %[a], %[fifteen] \n\t"
- "or %[b], %[b], %[g] \n\t"
- "or %[b], %[b], %[r] \n\t"
- "or %[b], %[b], %[a] \n\t"
-
- "gssdrc1 %[b], 0x00(%[dst_rgb]) \n\t"
- "gssdlc1 %[b], 0x07(%[dst_rgb]) \n\t"
-
- "daddiu %[src_argb], %[src_argb], 0x10 \n\t"
- "daddiu %[dst_rgb], %[dst_rgb], 0x08 \n\t"
- "daddiu %[width], %[width], -0x04 \n\t"
- "bgtz %[width], 1b \n\t"
- : [src0] "=&f"(src0), [src1] "=&f"(src1), [b] "=&f"(ftmp[0]),
- [g] "=&f"(ftmp[1]), [r] "=&f"(ftmp[2]), [a] "=&f"(ftmp[3])
- : [src_argb] "r"(src_argb), [dst_rgb] "r"(dst_rgb), [width] "r"(width),
- [zero] "f"(0x00), [three] "f"(0x03), [five] "f"(0x05),
- [seven] "f"(0x07), [ten] "f"(0x0a), [fifteen] "f"(0x0f)
- : "memory");
-}
-
-void ARGBToARGB4444Row_MMI(const uint8_t* src_argb,
- uint8_t* dst_rgb,
- int width) {
- uint64_t src0, src1;
- uint64_t ftmp[4];
-
- __asm__ volatile(
- "1: \n\t"
- "gsldrc1 %[src0], 0x00(%[src_argb]) \n\t"
- "gsldlc1 %[src0], 0x07(%[src_argb]) \n\t"
- "gsldrc1 %[src1], 0x08(%[src_argb]) \n\t"
- "gsldlc1 %[src1], 0x0f(%[src_argb]) \n\t"
-
- "punpcklbh %[b], %[src0], %[src1] \n\t"
- "punpckhbh %[g], %[src0], %[src1] \n\t"
- "punpcklbh %[src0], %[b], %[g] \n\t"
- "punpckhbh %[src1], %[b], %[g] \n\t"
- "punpcklbh %[b], %[src0], %[zero] \n\t"
- "punpckhbh %[g], %[src0], %[zero] \n\t"
- "punpcklbh %[r], %[src1], %[zero] \n\t"
- "punpckhbh %[a], %[src1], %[zero] \n\t"
-
- "psrlh %[b], %[b], %[four] \n\t"
- "psrlh %[g], %[g], %[four] \n\t"
- "psrlh %[r], %[r], %[four] \n\t"
- "psrlh %[a], %[a], %[four] \n\t"
-
- "psllh %[g], %[g], %[four] \n\t"
- "psllh %[r], %[r], %[eight] \n\t"
- "psllh %[a], %[a], %[twelve] \n\t"
- "or %[b], %[b], %[g] \n\t"
- "or %[b], %[b], %[r] \n\t"
- "or %[b], %[b], %[a] \n\t"
-
- "gssdrc1 %[b], 0x00(%[dst_rgb]) \n\t"
- "gssdlc1 %[b], 0x07(%[dst_rgb]) \n\t"
-
- "daddiu %[src_argb], %[src_argb], 0x10 \n\t"
- "daddiu %[dst_rgb], %[dst_rgb], 0x08 \n\t"
- "daddiu %[width], %[width], -0x04 \n\t"
- "bgtz %[width], 1b \n\t"
- : [src0] "=&f"(src0), [src1] "=&f"(src1), [b] "=&f"(ftmp[0]),
- [g] "=&f"(ftmp[1]), [r] "=&f"(ftmp[2]), [a] "=&f"(ftmp[3])
- : [src_argb] "r"(src_argb), [dst_rgb] "r"(dst_rgb), [width] "r"(width),
- [zero] "f"(0x00), [four] "f"(0x04), [eight] "f"(0x08),
- [twelve] "f"(0x0c)
- : "memory");
-}
-
-void ARGBToYRow_MMI(const uint8_t* src_argb, uint8_t* dst_y, int width) {
- uint64_t src, src_hi, src_lo;
- uint64_t dest0, dest1, dest2, dest3;
- const uint64_t value = 0x1080;
- const uint64_t mask = 0x0001004200810019;
-
- __asm__ volatile(
- "1: \n\t"
- "gsldlc1 %[src], 0x07(%[src_argb]) \n\t"
- "gsldrc1 %[src], 0x00(%[src_argb]) \n\t"
- "punpcklbh %[src_lo], %[src], %[zero] \n\t"
- "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
- "punpckhbh %[src_hi], %[src], %[zero] \n\t"
- "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
- "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
- "punpckhwd %[dest0], %[src_lo], %[src_hi] \n\t"
- "paddw %[dest0], %[dest0], %[src] \n\t"
- "psrlw %[dest0], %[dest0], %[eight] \n\t"
-
- "gsldlc1 %[src], 0x0f(%[src_argb]) \n\t"
- "gsldrc1 %[src], 0x08(%[src_argb]) \n\t"
- "punpcklbh %[src_lo], %[src], %[zero] \n\t"
- "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
- "punpckhbh %[src_hi], %[src], %[zero] \n\t"
- "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
- "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
- "punpckhwd %[dest1], %[src_lo], %[src_hi] \n\t"
- "paddw %[dest1], %[dest1], %[src] \n\t"
- "psrlw %[dest1], %[dest1], %[eight] \n\t"
-
- "gsldlc1 %[src], 0x17(%[src_argb]) \n\t"
- "gsldrc1 %[src], 0x10(%[src_argb]) \n\t"
- "punpcklbh %[src_lo], %[src], %[zero] \n\t"
- "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
- "punpckhbh %[src_hi], %[src], %[zero] \n\t"
- "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
- "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
- "punpckhwd %[dest2], %[src_lo], %[src_hi] \n\t"
- "paddw %[dest2], %[dest2], %[src] \n\t"
- "psrlw %[dest2], %[dest2], %[eight] \n\t"
-
- "gsldlc1 %[src], 0x1f(%[src_argb]) \n\t"
- "gsldrc1 %[src], 0x18(%[src_argb]) \n\t"
- "punpcklbh %[src_lo], %[src], %[zero] \n\t"
- "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
- "punpckhbh %[src_hi], %[src], %[zero] \n\t"
- "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
- "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
- "punpckhwd %[dest3], %[src_lo], %[src_hi] \n\t"
- "paddw %[dest3], %[dest3], %[src] \n\t"
- "psrlw %[dest3], %[dest3], %[eight] \n\t"
-
- "packsswh %[src_lo], %[dest0], %[dest1] \n\t"
- "packsswh %[src_hi], %[dest2], %[dest3] \n\t"
- "packushb %[dest0], %[src_lo], %[src_hi] \n\t"
- "gssdlc1 %[dest0], 0x07(%[dst_y]) \n\t"
- "gssdrc1 %[dest0], 0x00(%[dst_y]) \n\t"
-
- "daddiu %[src_argb], %[src_argb], 0x20 \n\t"
- "daddiu %[dst_y], %[dst_y], 0x08 \n\t"
- "daddi %[width], %[width], -0x08 \n\t"
- "bnez %[width], 1b \n\t"
- : [src] "=&f"(src), [src_hi] "=&f"(src_hi), [src_lo] "=&f"(src_lo),
- [dest0] "=&f"(dest0), [dest1] "=&f"(dest1), [dest2] "=&f"(dest2),
- [dest3] "=&f"(dest3)
- : [src_argb] "r"(src_argb), [dst_y] "r"(dst_y), [width] "r"(width),
- [mask] "f"(mask), [value] "f"(value), [eight] "f"(0x08),
- [zero] "f"(0x00)
- : "memory");
-}
-
-void ARGBToUVRow_MMI(const uint8_t* src_rgb,
- int src_stride_rgb,
- uint8_t* dst_u,
- uint8_t* dst_v,
- int width) {
- uint64_t src_rgb1;
- uint64_t ftmp[13];
- uint64_t tmp[1];
- const uint64_t value = 0x4040;
- const uint64_t mask_u = 0x0013002500380002;
- const uint64_t mask_v = 0x00020038002f0009;
-
- __asm__ volatile(
- "dli %[tmp0], 0x0001000100010001 \n\t"
- "dmtc1 %[tmp0], %[ftmp12] \n\t"
- "1: \n\t"
- "daddu %[src_rgb1], %[src_rgb], %[src_stride_rgb] \n\t"
- "gsldrc1 %[src0], 0x00(%[src_rgb]) \n\t"
- "gsldlc1 %[src0], 0x07(%[src_rgb]) \n\t"
- "gsldrc1 %[src1], 0x00(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x07(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "paddh %[src0], %[src0], %[ftmp12] \n\t"
- "psrlh %[src0], %[src0], %[one] \n\t"
- "dsll %[dest0_u], %[src0], %[sixteen] \n\t"
- "pinsrh_0 %[dest0_u], %[dest0_u], %[value] \n\t"
- "pinsrh_3 %[dest0_v], %[src0], %[value] \n\t"
- "pmaddhw %[dest0_u], %[dest0_u], %[mask_u] \n\t"
- "pmaddhw %[dest0_v], %[dest0_v], %[mask_v] \n\t"
-
- "gsldrc1 %[src0], 0x08(%[src_rgb]) \n\t"
- "gsldlc1 %[src0], 0x0f(%[src_rgb]) \n\t"
- "gsldrc1 %[src1], 0x08(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x0f(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "paddh %[src0], %[src0], %[ftmp12] \n\t"
- "psrlh %[src0], %[src0], %[one] \n\t"
- "dsll %[src_lo], %[src0], %[sixteen] \n\t"
- "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
- "pinsrh_3 %[src_hi], %[src0], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
-
- "punpcklwd %[src0], %[dest0_u], %[src_lo] \n\t"
- "punpckhwd %[src1], %[dest0_u], %[src_lo] \n\t"
- "psubw %[dest0_u], %[src0], %[src1] \n\t"
- "psraw %[dest0_u], %[dest0_u], %[eight] \n\t"
- "punpcklwd %[src0], %[dest0_v], %[src_hi] \n\t"
- "punpckhwd %[src1], %[dest0_v], %[src_hi] \n\t"
- "psubw %[dest0_v], %[src1], %[src0] \n\t"
- "psraw %[dest0_v], %[dest0_v], %[eight] \n\t"
-
- "gsldrc1 %[src0], 0x10(%[src_rgb]) \n\t"
- "gsldlc1 %[src0], 0x17(%[src_rgb]) \n\t"
- "gsldrc1 %[src1], 0x10(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x17(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "paddh %[src0], %[src0], %[ftmp12] \n\t"
- "psrlh %[src0], %[src0], %[one] \n\t"
- "dsll %[dest1_u], %[src0], %[sixteen] \n\t"
- "pinsrh_0 %[dest1_u], %[dest1_u], %[value] \n\t"
- "pinsrh_3 %[dest1_v], %[src0], %[value] \n\t"
- "pmaddhw %[dest1_u], %[dest1_u], %[mask_u] \n\t"
- "pmaddhw %[dest1_v], %[dest1_v], %[mask_v] \n\t"
-
- "gsldrc1 %[src0], 0x18(%[src_rgb]) \n\t"
- "gsldlc1 %[src0], 0x1f(%[src_rgb]) \n\t"
- "gsldrc1 %[src1], 0x18(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x1f(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "paddh %[src0], %[src0], %[ftmp12] \n\t"
- "psrlh %[src0], %[src0], %[one] \n\t"
- "dsll %[src_lo], %[src0], %[sixteen] \n\t"
- "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
- "pinsrh_3 %[src_hi], %[src0], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
-
- "punpcklwd %[src0], %[dest1_u], %[src_lo] \n\t"
- "punpckhwd %[src1], %[dest1_u], %[src_lo] \n\t"
- "psubw %[dest1_u], %[src0], %[src1] \n\t"
- "psraw %[dest1_u], %[dest1_u], %[eight] \n\t"
- "punpcklwd %[src0], %[dest1_v], %[src_hi] \n\t"
- "punpckhwd %[src1], %[dest1_v], %[src_hi] \n\t"
- "psubw %[dest1_v], %[src1], %[src0] \n\t"
- "psraw %[dest1_v], %[dest1_v], %[eight] \n\t"
-
- "gsldrc1 %[src0], 0x20(%[src_rgb]) \n\t"
- "gsldlc1 %[src0], 0x27(%[src_rgb]) \n\t"
- "gsldrc1 %[src1], 0x20(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x27(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "paddh %[src0], %[src0], %[ftmp12] \n\t"
- "psrlh %[src0], %[src0], %[one] \n\t"
- "dsll %[dest2_u], %[src0], %[sixteen] \n\t"
- "pinsrh_0 %[dest2_u], %[dest2_u], %[value] \n\t"
- "pinsrh_3 %[dest2_v], %[src0], %[value] \n\t"
- "pmaddhw %[dest2_u], %[dest2_u], %[mask_u] \n\t"
- "pmaddhw %[dest2_v], %[dest2_v], %[mask_v] \n\t"
-
- "gsldrc1 %[src0], 0x28(%[src_rgb]) \n\t"
- "gsldlc1 %[src0], 0x2f(%[src_rgb]) \n\t"
- "gsldrc1 %[src1], 0x28(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x2f(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "paddh %[src0], %[src0], %[ftmp12] \n\t"
- "psrlh %[src0], %[src0], %[one] \n\t"
- "dsll %[src_lo], %[src0], %[sixteen] \n\t"
- "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
- "pinsrh_3 %[src_hi], %[src0], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
-
- "punpcklwd %[src0], %[dest2_u], %[src_lo] \n\t"
- "punpckhwd %[src1], %[dest2_u], %[src_lo] \n\t"
- "psubw %[dest2_u], %[src0], %[src1] \n\t"
- "psraw %[dest2_u], %[dest2_u], %[eight] \n\t"
- "punpcklwd %[src0], %[dest2_v], %[src_hi] \n\t"
- "punpckhwd %[src1], %[dest2_v], %[src_hi] \n\t"
- "psubw %[dest2_v], %[src1], %[src0] \n\t"
- "psraw %[dest2_v], %[dest2_v], %[eight] \n\t"
-
- "gsldrc1 %[src0], 0x30(%[src_rgb]) \n\t"
- "gsldlc1 %[src0], 0x37(%[src_rgb]) \n\t"
- "gsldrc1 %[src1], 0x30(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x37(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "paddh %[src0], %[src0], %[ftmp12] \n\t"
- "psrlh %[src0], %[src0], %[one] \n\t"
- "dsll %[dest3_u], %[src0], %[sixteen] \n\t"
- "pinsrh_0 %[dest3_u], %[dest3_u], %[value] \n\t"
- "pinsrh_3 %[dest3_v], %[src0], %[value] \n\t"
- "pmaddhw %[dest3_u], %[dest3_u], %[mask_u] \n\t"
- "pmaddhw %[dest3_v], %[dest3_v], %[mask_v] \n\t"
-
- "gsldrc1 %[src0], 0x38(%[src_rgb]) \n\t"
- "gsldlc1 %[src0], 0x3f(%[src_rgb]) \n\t"
- "gsldrc1 %[src1], 0x38(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x3f(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "paddh %[src0], %[src0], %[ftmp12] \n\t"
- "psrlh %[src0], %[src0], %[one] \n\t"
- "dsll %[src_lo], %[src0], %[sixteen] \n\t"
- "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
- "pinsrh_3 %[src_hi], %[src0], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
-
- "punpcklwd %[src0], %[dest3_u], %[src_lo] \n\t"
- "punpckhwd %[src1], %[dest3_u], %[src_lo] \n\t"
- "psubw %[dest3_u], %[src0], %[src1] \n\t"
- "psraw %[dest3_u], %[dest3_u], %[eight] \n\t"
- "punpcklwd %[src0], %[dest3_v], %[src_hi] \n\t"
- "punpckhwd %[src1], %[dest3_v], %[src_hi] \n\t"
- "psubw %[dest3_v], %[src1], %[src0] \n\t"
- "psraw %[dest3_v], %[dest3_v], %[eight] \n\t"
-
- "packsswh %[src0], %[dest0_u], %[dest1_u] \n\t"
- "packsswh %[src1], %[dest2_u], %[dest3_u] \n\t"
- "packushb %[dest0_u], %[src0], %[src1] \n\t"
- "gssdlc1 %[dest0_u], 0x07(%[dst_u]) \n\t"
- "gssdrc1 %[dest0_u], 0x00(%[dst_u]) \n\t"
-
- "packsswh %[src0], %[dest0_v], %[dest1_v] \n\t"
- "packsswh %[src1], %[dest2_v], %[dest3_v] \n\t"
- "packushb %[dest0_v], %[src0], %[src1] \n\t"
- "gssdlc1 %[dest0_v], 0x07(%[dst_v]) \n\t"
- "gssdrc1 %[dest0_v], 0x00(%[dst_v]) \n\t"
-
- "daddiu %[src_rgb], %[src_rgb], 0x40 \n\t"
- "daddiu %[dst_u], %[dst_u], 0x08 \n\t"
- "daddiu %[dst_v], %[dst_v], 0x08 \n\t"
- "daddi %[width], %[width], -0x10 \n\t"
- "bgtz %[width], 1b \n\t"
- : [src_rgb1] "=&r"(src_rgb1), [src0] "=&f"(ftmp[0]),
- [src1] "=&f"(ftmp[1]), [src_lo] "=&f"(ftmp[2]), [src_hi] "=&f"(ftmp[3]),
- [dest0_u] "=&f"(ftmp[4]), [dest0_v] "=&f"(ftmp[5]),
- [dest1_u] "=&f"(ftmp[6]), [dest1_v] "=&f"(ftmp[7]),
- [dest2_u] "=&f"(ftmp[8]), [dest2_v] "=&f"(ftmp[9]),
- [dest3_u] "=&f"(ftmp[10]), [dest3_v] "=&f"(ftmp[11]),
- [ftmp12] "=&f"(ftmp[12]), [tmp0] "=&r"(tmp[0])
- : [src_rgb] "r"(src_rgb), [src_stride_rgb] "r"(src_stride_rgb),
- [dst_u] "r"(dst_u), [dst_v] "r"(dst_v), [width] "r"(width),
- [mask_u] "f"(mask_u), [mask_v] "f"(mask_v), [value] "f"(value),
- [zero] "f"(0x00), [eight] "f"(0x08), [one] "f"(0x01),
- [sixteen] "f"(0x10)
- : "memory");
-}
-
-void BGRAToYRow_MMI(const uint8_t* src_argb, uint8_t* dst_y, int width) {
- uint64_t src, src_hi, src_lo;
- uint64_t dest0, dest1, dest2, dest3;
- const uint64_t value = 0x1080;
- const uint64_t mask = 0x0019008100420001;
-
- __asm__ volatile(
- "1: \n\t"
- "gsldlc1 %[src], 0x07(%[src_argb]) \n\t"
- "gsldrc1 %[src], 0x00(%[src_argb]) \n\t"
- "punpcklbh %[src_lo], %[src], %[zero] \n\t"
- "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
- "punpckhbh %[src_hi], %[src], %[zero] \n\t"
- "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
- "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
- "punpckhwd %[dest0], %[src_lo], %[src_hi] \n\t"
- "paddw %[dest0], %[dest0], %[src] \n\t"
- "psrlw %[dest0], %[dest0], %[eight] \n\t"
-
- "gsldlc1 %[src], 0x0f(%[src_argb]) \n\t"
- "gsldrc1 %[src], 0x08(%[src_argb]) \n\t"
- "punpcklbh %[src_lo], %[src], %[zero] \n\t"
- "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
- "punpckhbh %[src_hi], %[src], %[zero] \n\t"
- "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
- "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
- "punpckhwd %[dest1], %[src_lo], %[src_hi] \n\t"
- "paddw %[dest1], %[dest1], %[src] \n\t"
- "psrlw %[dest1], %[dest1], %[eight] \n\t"
-
- "gsldlc1 %[src], 0x17(%[src_argb]) \n\t"
- "gsldrc1 %[src], 0x10(%[src_argb]) \n\t"
- "punpcklbh %[src_lo], %[src], %[zero] \n\t"
- "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
- "punpckhbh %[src_hi], %[src], %[zero] \n\t"
- "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
- "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
- "punpckhwd %[dest2], %[src_lo], %[src_hi] \n\t"
- "paddw %[dest2], %[dest2], %[src] \n\t"
- "psrlw %[dest2], %[dest2], %[eight] \n\t"
-
- "gsldlc1 %[src], 0x1f(%[src_argb]) \n\t"
- "gsldrc1 %[src], 0x18(%[src_argb]) \n\t"
- "punpcklbh %[src_lo], %[src], %[zero] \n\t"
- "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
- "punpckhbh %[src_hi], %[src], %[zero] \n\t"
- "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
- "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
- "punpckhwd %[dest3], %[src_lo], %[src_hi] \n\t"
- "paddw %[dest3], %[dest3], %[src] \n\t"
- "psrlw %[dest3], %[dest3], %[eight] \n\t"
-
- "packsswh %[src_lo], %[dest0], %[dest1] \n\t"
- "packsswh %[src_hi], %[dest2], %[dest3] \n\t"
- "packushb %[dest0], %[src_lo], %[src_hi] \n\t"
- "gssdlc1 %[dest0], 0x07(%[dst_y]) \n\t"
- "gssdrc1 %[dest0], 0x00(%[dst_y]) \n\t"
-
- "daddiu %[src_argb], %[src_argb], 0x20 \n\t"
- "daddiu %[dst_y], %[dst_y], 0x08 \n\t"
- "daddi %[width], %[width], -0x08 \n\t"
- "bnez %[width], 1b \n\t"
- : [src] "=&f"(src), [src_hi] "=&f"(src_hi), [src_lo] "=&f"(src_lo),
- [dest0] "=&f"(dest0), [dest1] "=&f"(dest1), [dest2] "=&f"(dest2),
- [dest3] "=&f"(dest3)
- : [src_argb] "r"(src_argb), [dst_y] "r"(dst_y), [width] "r"(width),
- [mask] "f"(mask), [value] "f"(value), [eight] "f"(0x08),
- [zero] "f"(0x00)
- : "memory");
-}
-
-void BGRAToUVRow_MMI(const uint8_t* src_rgb,
- int src_stride_rgb,
- uint8_t* dst_u,
- uint8_t* dst_v,
- int width) {
- uint64_t src_rgb1;
- uint64_t ftmp[13];
- uint64_t tmp[1];
- const uint64_t value = 0x4040;
- const uint64_t mask_u = 0x0002003800250013;
- const uint64_t mask_v = 0x0009002f00380002;
-
- __asm__ volatile(
- "dli %[tmp0], 0x0001000100010001 \n\t"
- "dmtc1 %[tmp0], %[ftmp12] \n\t"
- "1: \n\t"
- "daddu %[src_rgb1], %[src_rgb], %[src_stride_rgb] \n\t"
- "gsldrc1 %[src0], 0x00(%[src_rgb]) \n\t"
- "gsldlc1 %[src0], 0x07(%[src_rgb]) \n\t"
- "gsldrc1 %[src1], 0x00(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x07(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "paddh %[src0], %[src0], %[ftmp12] \n\t"
- "psrlh %[src0], %[src0], %[one] \n\t"
- "dsrl %[dest0_u], %[src0], %[sixteen] \n\t"
- "pinsrh_3 %[dest0_u], %[dest0_u], %[value] \n\t"
- "pinsrh_0 %[dest0_v], %[src0], %[value] \n\t"
- "pmaddhw %[dest0_u], %[dest0_u], %[mask_u] \n\t"
- "pmaddhw %[dest0_v], %[dest0_v], %[mask_v] \n\t"
-
- "gsldrc1 %[src0], 0x08(%[src_rgb]) \n\t"
- "gsldlc1 %[src0], 0x0f(%[src_rgb]) \n\t"
- "gsldrc1 %[src1], 0x08(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x0f(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "paddh %[src0], %[src0], %[ftmp12] \n\t"
- "psrlh %[src0], %[src0], %[one] \n\t"
- "dsrl %[src_lo], %[src0], %[sixteen] \n\t"
- "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
- "pinsrh_0 %[src_hi], %[src0], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
-
- "punpcklwd %[src0], %[dest0_u], %[src_lo] \n\t"
- "punpckhwd %[src1], %[dest0_u], %[src_lo] \n\t"
- "psubw %[dest0_u], %[src1], %[src0] \n\t"
- "psraw %[dest0_u], %[dest0_u], %[eight] \n\t"
- "punpcklwd %[src0], %[dest0_v], %[src_hi] \n\t"
- "punpckhwd %[src1], %[dest0_v], %[src_hi] \n\t"
- "psubw %[dest0_v], %[src0], %[src1] \n\t"
- "psraw %[dest0_v], %[dest0_v], %[eight] \n\t"
-
- "gsldrc1 %[src0], 0x10(%[src_rgb]) \n\t"
- "gsldlc1 %[src0], 0x17(%[src_rgb]) \n\t"
- "gsldrc1 %[src1], 0x10(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x17(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "paddh %[src0], %[src0], %[ftmp12] \n\t"
- "psrlh %[src0], %[src0], %[one] \n\t"
- "dsrl %[dest1_u], %[src0], %[sixteen] \n\t"
- "pinsrh_3 %[dest1_u], %[dest1_u], %[value] \n\t"
- "pinsrh_0 %[dest1_v], %[src0], %[value] \n\t"
- "pmaddhw %[dest1_u], %[dest1_u], %[mask_u] \n\t"
- "pmaddhw %[dest1_v], %[dest1_v], %[mask_v] \n\t"
-
- "gsldrc1 %[src0], 0x18(%[src_rgb]) \n\t"
- "gsldlc1 %[src0], 0x1f(%[src_rgb]) \n\t"
- "gsldrc1 %[src1], 0x18(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x1f(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "paddh %[src0], %[src0], %[ftmp12] \n\t"
- "psrlh %[src0], %[src0], %[one] \n\t"
- "dsrl %[src_lo], %[src0], %[sixteen] \n\t"
- "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
- "pinsrh_0 %[src_hi], %[src0], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
-
- "punpcklwd %[src0], %[dest1_u], %[src_lo] \n\t"
- "punpckhwd %[src1], %[dest1_u], %[src_lo] \n\t"
- "psubw %[dest1_u], %[src1], %[src0] \n\t"
- "psraw %[dest1_u], %[dest1_u], %[eight] \n\t"
- "punpcklwd %[src0], %[dest1_v], %[src_hi] \n\t"
- "punpckhwd %[src1], %[dest1_v], %[src_hi] \n\t"
- "psubw %[dest1_v], %[src0], %[src1] \n\t"
- "psraw %[dest1_v], %[dest1_v], %[eight] \n\t"
-
- "gsldrc1 %[src0], 0x20(%[src_rgb]) \n\t"
- "gsldlc1 %[src0], 0x27(%[src_rgb]) \n\t"
- "gsldrc1 %[src1], 0x20(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x27(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "paddh %[src0], %[src0], %[ftmp12] \n\t"
- "psrlh %[src0], %[src0], %[one] \n\t"
- "dsrl %[dest2_u], %[src0], %[sixteen] \n\t"
- "pinsrh_3 %[dest2_u], %[dest2_u], %[value] \n\t"
- "pinsrh_0 %[dest2_v], %[src0], %[value] \n\t"
- "pmaddhw %[dest2_u], %[dest2_u], %[mask_u] \n\t"
- "pmaddhw %[dest2_v], %[dest2_v], %[mask_v] \n\t"
-
- "gsldrc1 %[src0], 0x28(%[src_rgb]) \n\t"
- "gsldlc1 %[src0], 0x2f(%[src_rgb]) \n\t"
- "gsldrc1 %[src1], 0x28(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x2f(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "paddh %[src0], %[src0], %[ftmp12] \n\t"
- "psrlh %[src0], %[src0], %[one] \n\t"
- "dsrl %[src_lo], %[src0], %[sixteen] \n\t"
- "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
- "pinsrh_0 %[src_hi], %[src0], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
-
- "punpcklwd %[src0], %[dest2_u], %[src_lo] \n\t"
- "punpckhwd %[src1], %[dest2_u], %[src_lo] \n\t"
- "psubw %[dest2_u], %[src1], %[src0] \n\t"
- "psraw %[dest2_u], %[dest2_u], %[eight] \n\t"
- "punpcklwd %[src0], %[dest2_v], %[src_hi] \n\t"
- "punpckhwd %[src1], %[dest2_v], %[src_hi] \n\t"
- "psubw %[dest2_v], %[src0], %[src1] \n\t"
- "psraw %[dest2_v], %[dest2_v], %[eight] \n\t"
-
- "gsldrc1 %[src0], 0x30(%[src_rgb]) \n\t"
- "gsldlc1 %[src0], 0x37(%[src_rgb]) \n\t"
- "gsldrc1 %[src1], 0x30(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x37(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "paddh %[src0], %[src0], %[ftmp12] \n\t"
- "psrlh %[src0], %[src0], %[one] \n\t"
- "dsrl %[dest3_u], %[src0], %[sixteen] \n\t"
- "pinsrh_3 %[dest3_u], %[dest3_u], %[value] \n\t"
- "pinsrh_0 %[dest3_v], %[src0], %[value] \n\t"
- "pmaddhw %[dest3_u], %[dest3_u], %[mask_u] \n\t"
- "pmaddhw %[dest3_v], %[dest3_v], %[mask_v] \n\t"
-
- "gsldrc1 %[src0], 0x38(%[src_rgb]) \n\t"
- "gsldlc1 %[src0], 0x3f(%[src_rgb]) \n\t"
- "gsldrc1 %[src1], 0x38(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x3f(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "paddh %[src0], %[src0], %[ftmp12] \n\t"
- "psrlh %[src0], %[src0], %[one] \n\t"
- "dsrl %[src_lo], %[src0], %[sixteen] \n\t"
- "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
- "pinsrh_0 %[src_hi], %[src0], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
-
- "punpcklwd %[src0], %[dest3_u], %[src_lo] \n\t"
- "punpckhwd %[src1], %[dest3_u], %[src_lo] \n\t"
- "psubw %[dest3_u], %[src1], %[src0] \n\t"
- "psraw %[dest3_u], %[dest3_u], %[eight] \n\t"
- "punpcklwd %[src0], %[dest3_v], %[src_hi] \n\t"
- "punpckhwd %[src1], %[dest3_v], %[src_hi] \n\t"
- "psubw %[dest3_v], %[src0], %[src1] \n\t"
- "psraw %[dest3_v], %[dest3_v], %[eight] \n\t"
-
- "packsswh %[src0], %[dest0_u], %[dest1_u] \n\t"
- "packsswh %[src1], %[dest2_u], %[dest3_u] \n\t"
- "packushb %[dest0_u], %[src0], %[src1] \n\t"
- "gssdlc1 %[dest0_u], 0x07(%[dst_u]) \n\t"
- "gssdrc1 %[dest0_u], 0x00(%[dst_u]) \n\t"
-
- "packsswh %[src0], %[dest0_v], %[dest1_v] \n\t"
- "packsswh %[src1], %[dest2_v], %[dest3_v] \n\t"
- "packushb %[dest0_v], %[src0], %[src1] \n\t"
- "gssdlc1 %[dest0_v], 0x07(%[dst_v]) \n\t"
- "gssdrc1 %[dest0_v], 0x00(%[dst_v]) \n\t"
-
- "daddiu %[src_rgb], %[src_rgb], 0x40 \n\t"
- "daddiu %[dst_u], %[dst_u], 0x08 \n\t"
- "daddiu %[dst_v], %[dst_v], 0x08 \n\t"
- "daddi %[width], %[width], -0x10 \n\t"
- "bgtz %[width], 1b \n\t"
- : [src_rgb1] "=&r"(src_rgb1), [src0] "=&f"(ftmp[0]),
- [src1] "=&f"(ftmp[1]), [src_lo] "=&f"(ftmp[2]), [src_hi] "=&f"(ftmp[3]),
- [dest0_u] "=&f"(ftmp[4]), [dest0_v] "=&f"(ftmp[5]),
- [dest1_u] "=&f"(ftmp[6]), [dest1_v] "=&f"(ftmp[7]),
- [dest2_u] "=&f"(ftmp[8]), [dest2_v] "=&f"(ftmp[9]),
- [dest3_u] "=&f"(ftmp[10]), [dest3_v] "=&f"(ftmp[11]),
- [ftmp12] "=&f"(ftmp[12]), [tmp0] "=&r"(tmp[0])
- : [src_rgb] "r"(src_rgb), [src_stride_rgb] "r"(src_stride_rgb),
- [dst_u] "r"(dst_u), [dst_v] "r"(dst_v), [width] "r"(width),
- [mask_u] "f"(mask_u), [mask_v] "f"(mask_v), [value] "f"(value),
- [zero] "f"(0x00), [eight] "f"(0x08), [one] "f"(0x01),
- [sixteen] "f"(0x10)
- : "memory");
-}
-
-void ABGRToYRow_MMI(const uint8_t* src_argb, uint8_t* dst_y, int width) {
- uint64_t src, src_hi, src_lo;
- uint64_t dest0, dest1, dest2, dest3;
- const uint64_t value = 0x1080;
- const uint64_t mask = 0x0001001900810042;
-
- __asm__ volatile(
- "1: \n\t"
- "gsldlc1 %[src], 0x07(%[src_argb]) \n\t"
- "gsldrc1 %[src], 0x00(%[src_argb]) \n\t"
- "punpcklbh %[src_lo], %[src], %[zero] \n\t"
- "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
- "punpckhbh %[src_hi], %[src], %[zero] \n\t"
- "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
- "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
- "punpckhwd %[dest0], %[src_lo], %[src_hi] \n\t"
- "paddw %[dest0], %[dest0], %[src] \n\t"
- "psrlw %[dest0], %[dest0], %[eight] \n\t"
-
- "gsldlc1 %[src], 0x0f(%[src_argb]) \n\t"
- "gsldrc1 %[src], 0x08(%[src_argb]) \n\t"
- "punpcklbh %[src_lo], %[src], %[zero] \n\t"
- "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
- "punpckhbh %[src_hi], %[src], %[zero] \n\t"
- "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
- "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
- "punpckhwd %[dest1], %[src_lo], %[src_hi] \n\t"
- "paddw %[dest1], %[dest1], %[src] \n\t"
- "psrlw %[dest1], %[dest1], %[eight] \n\t"
-
- "gsldlc1 %[src], 0x17(%[src_argb]) \n\t"
- "gsldrc1 %[src], 0x10(%[src_argb]) \n\t"
- "punpcklbh %[src_lo], %[src], %[zero] \n\t"
- "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
- "punpckhbh %[src_hi], %[src], %[zero] \n\t"
- "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
- "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
- "punpckhwd %[dest2], %[src_lo], %[src_hi] \n\t"
- "paddw %[dest2], %[dest2], %[src] \n\t"
- "psrlw %[dest2], %[dest2], %[eight] \n\t"
-
- "gsldlc1 %[src], 0x1f(%[src_argb]) \n\t"
- "gsldrc1 %[src], 0x18(%[src_argb]) \n\t"
- "punpcklbh %[src_lo], %[src], %[zero] \n\t"
- "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
- "punpckhbh %[src_hi], %[src], %[zero] \n\t"
- "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
- "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
- "punpckhwd %[dest3], %[src_lo], %[src_hi] \n\t"
- "paddw %[dest3], %[dest3], %[src] \n\t"
- "psrlw %[dest3], %[dest3], %[eight] \n\t"
-
- "packsswh %[src_lo], %[dest0], %[dest1] \n\t"
- "packsswh %[src_hi], %[dest2], %[dest3] \n\t"
- "packushb %[dest0], %[src_lo], %[src_hi] \n\t"
- "gssdlc1 %[dest0], 0x07(%[dst_y]) \n\t"
- "gssdrc1 %[dest0], 0x00(%[dst_y]) \n\t"
-
- "daddiu %[src_argb], %[src_argb], 0x20 \n\t"
- "daddiu %[dst_y], %[dst_y], 0x08 \n\t"
- "daddi %[width], %[width], -0x08 \n\t"
- "bnez %[width], 1b \n\t"
- : [src] "=&f"(src), [src_hi] "=&f"(src_hi), [src_lo] "=&f"(src_lo),
- [dest0] "=&f"(dest0), [dest1] "=&f"(dest1), [dest2] "=&f"(dest2),
- [dest3] "=&f"(dest3)
- : [src_argb] "r"(src_argb), [dst_y] "r"(dst_y), [width] "r"(width),
- [mask] "f"(mask), [value] "f"(value), [eight] "f"(0x08),
- [zero] "f"(0x00)
- : "memory");
-}
-
-void ABGRToUVRow_MMI(const uint8_t* src_rgb,
- int src_stride_rgb,
- uint8_t* dst_u,
- uint8_t* dst_v,
- int width) {
- uint64_t src_rgb1;
- uint64_t ftmp[13];
- uint64_t tmp[1];
- const uint64_t value = 0x4040;
- const uint64_t mask_u = 0x0002003800250013;
- const uint64_t mask_v = 0x0009002F00380002;
-
- __asm__ volatile(
- "dli %[tmp0], 0x0001000100010001 \n\t"
- "dmtc1 %[tmp0], %[ftmp12] \n\t"
- "1: \n\t"
- "daddu %[src_rgb1], %[src_rgb], %[src_stride_rgb] \n\t"
- "gsldrc1 %[src0], 0x00(%[src_rgb]) \n\t"
- "gsldlc1 %[src0], 0x07(%[src_rgb]) \n\t"
- "gsldrc1 %[src1], 0x00(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x07(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "paddh %[src0], %[src0], %[ftmp12] \n\t"
- "psrlh %[src0], %[src0], %[one] \n\t"
- "pinsrh_3 %[dest0_u], %[src0], %[value] \n\t"
- "dsll %[dest0_v], %[src0], %[sixteen] \n\t"
- "pinsrh_0 %[dest0_v], %[dest0_v], %[value] \n\t"
- "pmaddhw %[dest0_u], %[dest0_u], %[mask_u] \n\t"
- "pmaddhw %[dest0_v], %[dest0_v], %[mask_v] \n\t"
-
- "gsldrc1 %[src0], 0x08(%[src_rgb]) \n\t"
- "gsldlc1 %[src0], 0x0f(%[src_rgb]) \n\t"
- "gsldrc1 %[src1], 0x08(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x0f(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "paddh %[src0], %[src0], %[ftmp12] \n\t"
- "psrlh %[src0], %[src0], %[one] \n\t"
- "pinsrh_3 %[src_lo], %[src0], %[value] \n\t"
- "dsll %[src_hi], %[src0], %[sixteen] \n\t"
- "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
-
- "punpcklwd %[src0], %[dest0_u], %[src_lo] \n\t"
- "punpckhwd %[src1], %[dest0_u], %[src_lo] \n\t"
- "psubw %[dest0_u], %[src1], %[src0] \n\t"
- "psraw %[dest0_u], %[dest0_u], %[eight] \n\t"
- "punpcklwd %[src0], %[dest0_v], %[src_hi] \n\t"
- "punpckhwd %[src1], %[dest0_v], %[src_hi] \n\t"
- "psubw %[dest0_v], %[src0], %[src1] \n\t"
- "psraw %[dest0_v], %[dest0_v], %[eight] \n\t"
-
- "gsldrc1 %[src0], 0x10(%[src_rgb]) \n\t"
- "gsldlc1 %[src0], 0x17(%[src_rgb]) \n\t"
- "gsldrc1 %[src1], 0x10(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x17(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "paddh %[src0], %[src0], %[ftmp12] \n\t"
- "psrlh %[src0], %[src0], %[one] \n\t"
- "pinsrh_3 %[dest1_u], %[src0], %[value] \n\t"
- "dsll %[dest1_v], %[src0], %[sixteen] \n\t"
- "pinsrh_0 %[dest1_v], %[dest1_v], %[value] \n\t"
- "pmaddhw %[dest1_u], %[dest1_u], %[mask_u] \n\t"
- "pmaddhw %[dest1_v], %[dest1_v], %[mask_v] \n\t"
-
- "gsldrc1 %[src0], 0x18(%[src_rgb]) \n\t"
- "gsldlc1 %[src0], 0x1f(%[src_rgb]) \n\t"
- "gsldrc1 %[src1], 0x18(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x1f(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "paddh %[src0], %[src0], %[ftmp12] \n\t"
- "psrlh %[src0], %[src0], %[one] \n\t"
- "pinsrh_3 %[src_lo], %[src0], %[value] \n\t"
- "dsll %[src_hi], %[src0], %[sixteen] \n\t"
- "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
-
- "punpcklwd %[src0], %[dest1_u], %[src_lo] \n\t"
- "punpckhwd %[src1], %[dest1_u], %[src_lo] \n\t"
- "psubw %[dest1_u], %[src1], %[src0] \n\t"
- "psraw %[dest1_u], %[dest1_u], %[eight] \n\t"
- "punpcklwd %[src0], %[dest1_v], %[src_hi] \n\t"
- "punpckhwd %[src1], %[dest1_v], %[src_hi] \n\t"
- "psubw %[dest1_v], %[src0], %[src1] \n\t"
- "psraw %[dest1_v], %[dest1_v], %[eight] \n\t"
-
- "gsldrc1 %[src0], 0x20(%[src_rgb]) \n\t"
- "gsldlc1 %[src0], 0x27(%[src_rgb]) \n\t"
- "gsldrc1 %[src1], 0x20(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x27(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "paddh %[src0], %[src0], %[ftmp12] \n\t"
- "psrlh %[src0], %[src0], %[one] \n\t"
- "pinsrh_3 %[dest2_u], %[src0], %[value] \n\t"
- "dsll %[dest2_v], %[src0], %[sixteen] \n\t"
- "pinsrh_0 %[dest2_v], %[dest2_v], %[value] \n\t"
- "pmaddhw %[dest2_u], %[dest2_u], %[mask_u] \n\t"
- "pmaddhw %[dest2_v], %[dest2_v], %[mask_v] \n\t"
-
- "gsldrc1 %[src0], 0x28(%[src_rgb]) \n\t"
- "gsldlc1 %[src0], 0x2f(%[src_rgb]) \n\t"
- "gsldrc1 %[src1], 0x28(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x2f(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "paddh %[src0], %[src0], %[ftmp12] \n\t"
- "psrlh %[src0], %[src0], %[one] \n\t"
- "pinsrh_3 %[src_lo], %[src0], %[value] \n\t"
- "dsll %[src_hi], %[src0], %[sixteen] \n\t"
- "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
-
- "punpcklwd %[src0], %[dest2_u], %[src_lo] \n\t"
- "punpckhwd %[src1], %[dest2_u], %[src_lo] \n\t"
- "psubw %[dest2_u], %[src1], %[src0] \n\t"
- "psraw %[dest2_u], %[dest2_u], %[eight] \n\t"
- "punpcklwd %[src0], %[dest2_v], %[src_hi] \n\t"
- "punpckhwd %[src1], %[dest2_v], %[src_hi] \n\t"
- "psubw %[dest2_v], %[src0], %[src1] \n\t"
- "psraw %[dest2_v], %[dest2_v], %[eight] \n\t"
-
- "gsldrc1 %[src0], 0x30(%[src_rgb]) \n\t"
- "gsldlc1 %[src0], 0x37(%[src_rgb]) \n\t"
- "gsldrc1 %[src1], 0x30(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x37(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "paddh %[src0], %[src0], %[ftmp12] \n\t"
- "psrlh %[src0], %[src0], %[one] \n\t"
- "pinsrh_3 %[dest3_u], %[src0], %[value] \n\t"
- "dsll %[dest3_v], %[src0], %[sixteen] \n\t"
- "pinsrh_0 %[dest3_v], %[dest3_v], %[value] \n\t"
- "pmaddhw %[dest3_u], %[dest3_u], %[mask_u] \n\t"
- "pmaddhw %[dest3_v], %[dest3_v], %[mask_v] \n\t"
-
- "gsldrc1 %[src0], 0x38(%[src_rgb]) \n\t"
- "gsldlc1 %[src0], 0x3f(%[src_rgb]) \n\t"
- "gsldrc1 %[src1], 0x38(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x3f(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "paddh %[src0], %[src0], %[ftmp12] \n\t"
- "psrlh %[src0], %[src0], %[one] \n\t"
- "pinsrh_3 %[src_lo], %[src0], %[value] \n\t"
- "dsll %[src_hi], %[src0], %[sixteen] \n\t"
- "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
-
- "punpcklwd %[src0], %[dest3_u], %[src_lo] \n\t"
- "punpckhwd %[src1], %[dest3_u], %[src_lo] \n\t"
- "psubw %[dest3_u], %[src1], %[src0] \n\t"
- "psraw %[dest3_u], %[dest3_u], %[eight] \n\t"
- "punpcklwd %[src0], %[dest3_v], %[src_hi] \n\t"
- "punpckhwd %[src1], %[dest3_v], %[src_hi] \n\t"
- "psubw %[dest3_v], %[src0], %[src1] \n\t"
- "psraw %[dest3_v], %[dest3_v], %[eight] \n\t"
-
- "packsswh %[src0], %[dest0_u], %[dest1_u] \n\t"
- "packsswh %[src1], %[dest2_u], %[dest3_u] \n\t"
- "packushb %[dest0_u], %[src0], %[src1] \n\t"
- "gssdlc1 %[dest0_u], 0x07(%[dst_u]) \n\t"
- "gssdrc1 %[dest0_u], 0x00(%[dst_u]) \n\t"
-
- "packsswh %[src0], %[dest0_v], %[dest1_v] \n\t"
- "packsswh %[src1], %[dest2_v], %[dest3_v] \n\t"
- "packushb %[dest0_v], %[src0], %[src1] \n\t"
- "gssdlc1 %[dest0_v], 0x07(%[dst_v]) \n\t"
- "gssdrc1 %[dest0_v], 0x00(%[dst_v]) \n\t"
-
- "daddiu %[src_rgb], %[src_rgb], 0x40 \n\t"
- "daddiu %[dst_u], %[dst_u], 0x08 \n\t"
- "daddiu %[dst_v], %[dst_v], 0x08 \n\t"
- "daddi %[width], %[width], -0x10 \n\t"
- "bgtz %[width], 1b \n\t"
- : [src_rgb1] "=&r"(src_rgb1), [src0] "=&f"(ftmp[0]),
- [src1] "=&f"(ftmp[1]), [src_lo] "=&f"(ftmp[2]), [src_hi] "=&f"(ftmp[3]),
- [dest0_u] "=&f"(ftmp[4]), [dest0_v] "=&f"(ftmp[5]),
- [dest1_u] "=&f"(ftmp[6]), [dest1_v] "=&f"(ftmp[7]),
- [dest2_u] "=&f"(ftmp[8]), [dest2_v] "=&f"(ftmp[9]),
- [dest3_u] "=&f"(ftmp[10]), [dest3_v] "=&f"(ftmp[11]),
- [ftmp12] "=&f"(ftmp[12]), [tmp0] "=&r"(tmp[0])
- : [src_rgb] "r"(src_rgb), [src_stride_rgb] "r"(src_stride_rgb),
- [dst_u] "r"(dst_u), [dst_v] "r"(dst_v), [width] "r"(width),
- [mask_u] "f"(mask_u), [mask_v] "f"(mask_v), [value] "f"(value),
- [zero] "f"(0x00), [eight] "f"(0x08), [one] "f"(0x01),
- [sixteen] "f"(0x10)
- : "memory");
-}
-
-void RGBAToYRow_MMI(const uint8_t* src_argb, uint8_t* dst_y, int width) {
- uint64_t src, src_hi, src_lo;
- uint64_t dest0, dest1, dest2, dest3;
- const uint64_t value = 0x1080;
- const uint64_t mask = 0x0042008100190001;
-
- __asm__ volatile(
- "1: \n\t"
- "gsldlc1 %[src], 0x07(%[src_argb]) \n\t"
- "gsldrc1 %[src], 0x00(%[src_argb]) \n\t"
- "punpcklbh %[src_lo], %[src], %[zero] \n\t"
- "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
- "punpckhbh %[src_hi], %[src], %[zero] \n\t"
- "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
- "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
- "punpckhwd %[dest0], %[src_lo], %[src_hi] \n\t"
- "paddw %[dest0], %[dest0], %[src] \n\t"
- "psrlw %[dest0], %[dest0], %[eight] \n\t"
-
- "gsldlc1 %[src], 0x0f(%[src_argb]) \n\t"
- "gsldrc1 %[src], 0x08(%[src_argb]) \n\t"
- "punpcklbh %[src_lo], %[src], %[zero] \n\t"
- "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
- "punpckhbh %[src_hi], %[src], %[zero] \n\t"
- "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
- "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
- "punpckhwd %[dest1], %[src_lo], %[src_hi] \n\t"
- "paddw %[dest1], %[dest1], %[src] \n\t"
- "psrlw %[dest1], %[dest1], %[eight] \n\t"
-
- "gsldlc1 %[src], 0x17(%[src_argb]) \n\t"
- "gsldrc1 %[src], 0x10(%[src_argb]) \n\t"
- "punpcklbh %[src_lo], %[src], %[zero] \n\t"
- "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
- "punpckhbh %[src_hi], %[src], %[zero] \n\t"
- "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
- "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
- "punpckhwd %[dest2], %[src_lo], %[src_hi] \n\t"
- "paddw %[dest2], %[dest2], %[src] \n\t"
- "psrlw %[dest2], %[dest2], %[eight] \n\t"
-
- "gsldlc1 %[src], 0x1f(%[src_argb]) \n\t"
- "gsldrc1 %[src], 0x18(%[src_argb]) \n\t"
- "punpcklbh %[src_lo], %[src], %[zero] \n\t"
- "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
- "punpckhbh %[src_hi], %[src], %[zero] \n\t"
- "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
- "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
- "punpckhwd %[dest3], %[src_lo], %[src_hi] \n\t"
- "paddw %[dest3], %[dest3], %[src] \n\t"
- "psrlw %[dest3], %[dest3], %[eight] \n\t"
-
- "packsswh %[src_lo], %[dest0], %[dest1] \n\t"
- "packsswh %[src_hi], %[dest2], %[dest3] \n\t"
- "packushb %[dest0], %[src_lo], %[src_hi] \n\t"
- "gssdlc1 %[dest0], 0x07(%[dst_y]) \n\t"
- "gssdrc1 %[dest0], 0x00(%[dst_y]) \n\t"
-
- "daddiu %[src_argb], %[src_argb], 0x20 \n\t"
- "daddiu %[dst_y], %[dst_y], 0x08 \n\t"
- "daddi %[width], %[width], -0x08 \n\t"
- "bnez %[width], 1b \n\t"
- : [src] "=&f"(src), [src_hi] "=&f"(src_hi), [src_lo] "=&f"(src_lo),
- [dest0] "=&f"(dest0), [dest1] "=&f"(dest1), [dest2] "=&f"(dest2),
- [dest3] "=&f"(dest3)
- : [src_argb] "r"(src_argb), [dst_y] "r"(dst_y), [width] "r"(width),
- [mask] "f"(mask), [value] "f"(value), [eight] "f"(0x08),
- [zero] "f"(0x00)
- : "memory");
-}
-
-void RGBAToUVRow_MMI(const uint8_t* src_rgb,
- int src_stride_rgb,
- uint8_t* dst_u,
- uint8_t* dst_v,
- int width) {
- uint64_t src_rgb1;
- uint64_t ftmp[13];
- uint64_t tmp[1];
- const uint64_t value = 0x4040;
- const uint64_t mask_u = 0x0013002500380002;
- const uint64_t mask_v = 0x00020038002f0009;
-
- __asm__ volatile(
- "dli %[tmp0], 0x0001000100010001 \n\t"
- "dmtc1 %[tmp0], %[ftmp12] \n\t"
- "1: \n\t"
- "daddu %[src_rgb1], %[src_rgb], %[src_stride_rgb] \n\t"
- "gsldrc1 %[src0], 0x00(%[src_rgb]) \n\t"
- "gsldlc1 %[src0], 0x07(%[src_rgb]) \n\t"
- "gsldrc1 %[src1], 0x00(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x07(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "paddh %[src0], %[src0], %[ftmp12] \n\t"
- "psrlh %[src0], %[src0], %[one] \n\t"
- "pinsrh_0 %[dest0_u], %[src0], %[value] \n\t"
- "dsrl %[dest0_v], %[src0], %[sixteen] \n\t"
- "pinsrh_3 %[dest0_v], %[dest0_v], %[value] \n\t"
- "pmaddhw %[dest0_u], %[dest0_u], %[mask_u] \n\t"
- "pmaddhw %[dest0_v], %[dest0_v], %[mask_v] \n\t"
-
- "gsldrc1 %[src0], 0x08(%[src_rgb]) \n\t"
- "gsldlc1 %[src0], 0x0f(%[src_rgb]) \n\t"
- "gsldrc1 %[src1], 0x08(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x0f(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "paddh %[src0], %[src0], %[ftmp12] \n\t"
- "psrlh %[src0], %[src0], %[one] \n\t"
- "pinsrh_0 %[src_lo], %[src0], %[value] \n\t"
- "dsrl %[src_hi], %[src0], %[sixteen] \n\t"
- "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
-
- "punpcklwd %[src0], %[dest0_u], %[src_lo] \n\t"
- "punpckhwd %[src1], %[dest0_u], %[src_lo] \n\t"
- "psubw %[dest0_u], %[src0], %[src1] \n\t"
- "psraw %[dest0_u], %[dest0_u], %[eight] \n\t"
- "punpcklwd %[src0], %[dest0_v], %[src_hi] \n\t"
- "punpckhwd %[src1], %[dest0_v], %[src_hi] \n\t"
- "psubw %[dest0_v], %[src1], %[src0] \n\t"
- "psraw %[dest0_v], %[dest0_v], %[eight] \n\t"
-
- "gsldrc1 %[src0], 0x10(%[src_rgb]) \n\t"
- "gsldlc1 %[src0], 0x17(%[src_rgb]) \n\t"
- "gsldrc1 %[src1], 0x10(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x17(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "paddh %[src0], %[src0], %[ftmp12] \n\t"
- "psrlh %[src0], %[src0], %[one] \n\t"
- "pinsrh_0 %[dest1_u], %[src0], %[value] \n\t"
- "dsrl %[dest1_v], %[src0], %[sixteen] \n\t"
- "pinsrh_3 %[dest1_v], %[dest1_v], %[value] \n\t"
- "pmaddhw %[dest1_u], %[dest1_u], %[mask_u] \n\t"
- "pmaddhw %[dest1_v], %[dest1_v], %[mask_v] \n\t"
-
- "gsldrc1 %[src0], 0x18(%[src_rgb]) \n\t"
- "gsldlc1 %[src0], 0x1f(%[src_rgb]) \n\t"
- "gsldrc1 %[src1], 0x18(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x1f(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "paddh %[src0], %[src0], %[ftmp12] \n\t"
- "psrlh %[src0], %[src0], %[one] \n\t"
- "pinsrh_0 %[src_lo], %[src0], %[value] \n\t"
- "dsrl %[src_hi], %[src0], %[sixteen] \n\t"
- "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
-
- "punpcklwd %[src0], %[dest1_u], %[src_lo] \n\t"
- "punpckhwd %[src1], %[dest1_u], %[src_lo] \n\t"
- "psubw %[dest1_u], %[src0], %[src1] \n\t"
- "psraw %[dest1_u], %[dest1_u], %[eight] \n\t"
- "punpcklwd %[src0], %[dest1_v], %[src_hi] \n\t"
- "punpckhwd %[src1], %[dest1_v], %[src_hi] \n\t"
- "psubw %[dest1_v], %[src1], %[src0] \n\t"
- "psraw %[dest1_v], %[dest1_v], %[eight] \n\t"
-
- "gsldrc1 %[src0], 0x20(%[src_rgb]) \n\t"
- "gsldlc1 %[src0], 0x27(%[src_rgb]) \n\t"
- "gsldrc1 %[src1], 0x20(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x27(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "paddh %[src0], %[src0], %[ftmp12] \n\t"
- "psrlh %[src0], %[src0], %[one] \n\t"
- "pinsrh_0 %[dest2_u], %[src0], %[value] \n\t"
- "dsrl %[dest2_v], %[src0], %[sixteen] \n\t"
- "pinsrh_3 %[dest2_v], %[dest2_v], %[value] \n\t"
- "pmaddhw %[dest2_u], %[dest2_u], %[mask_u] \n\t"
- "pmaddhw %[dest2_v], %[dest2_v], %[mask_v] \n\t"
-
- "gsldrc1 %[src0], 0x28(%[src_rgb]) \n\t"
- "gsldlc1 %[src0], 0x2f(%[src_rgb]) \n\t"
- "gsldrc1 %[src1], 0x28(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x2f(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "paddh %[src0], %[src0], %[ftmp12] \n\t"
- "psrlh %[src0], %[src0], %[one] \n\t"
- "pinsrh_0 %[src_lo], %[src0], %[value] \n\t"
- "dsrl %[src_hi], %[src0], %[sixteen] \n\t"
- "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
-
- "punpcklwd %[src0], %[dest2_u], %[src_lo] \n\t"
- "punpckhwd %[src1], %[dest2_u], %[src_lo] \n\t"
- "psubw %[dest2_u], %[src0], %[src1] \n\t"
- "psraw %[dest2_u], %[dest2_u], %[eight] \n\t"
- "punpcklwd %[src0], %[dest2_v], %[src_hi] \n\t"
- "punpckhwd %[src1], %[dest2_v], %[src_hi] \n\t"
- "psubw %[dest2_v], %[src1], %[src0] \n\t"
- "psraw %[dest2_v], %[dest2_v], %[eight] \n\t"
-
- "gsldrc1 %[src0], 0x30(%[src_rgb]) \n\t"
- "gsldlc1 %[src0], 0x37(%[src_rgb]) \n\t"
- "gsldrc1 %[src1], 0x30(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x37(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "paddh %[src0], %[src0], %[ftmp12] \n\t"
- "psrlh %[src0], %[src0], %[one] \n\t"
- "pinsrh_0 %[dest3_u], %[src0], %[value] \n\t"
- "dsrl %[dest3_v], %[src0], %[sixteen] \n\t"
- "pinsrh_3 %[dest3_v], %[dest3_v], %[value] \n\t"
- "pmaddhw %[dest3_u], %[dest3_u], %[mask_u] \n\t"
- "pmaddhw %[dest3_v], %[dest3_v], %[mask_v] \n\t"
-
- "gsldrc1 %[src0], 0x38(%[src_rgb]) \n\t"
- "gsldlc1 %[src0], 0x3f(%[src_rgb]) \n\t"
- "gsldrc1 %[src1], 0x38(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x3f(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "paddh %[src0], %[src0], %[ftmp12] \n\t"
- "psrlh %[src0], %[src0], %[one] \n\t"
- "pinsrh_0 %[src_lo], %[src0], %[value] \n\t"
- "dsrl %[src_hi], %[src0], %[sixteen] \n\t"
- "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
-
- "punpcklwd %[src0], %[dest3_u], %[src_lo] \n\t"
- "punpckhwd %[src1], %[dest3_u], %[src_lo] \n\t"
- "psubw %[dest3_u], %[src0], %[src1] \n\t"
- "psraw %[dest3_u], %[dest3_u], %[eight] \n\t"
- "punpcklwd %[src0], %[dest3_v], %[src_hi] \n\t"
- "punpckhwd %[src1], %[dest3_v], %[src_hi] \n\t"
- "psubw %[dest3_v], %[src1], %[src0] \n\t"
- "psraw %[dest3_v], %[dest3_v], %[eight] \n\t"
-
- "packsswh %[src0], %[dest0_u], %[dest1_u] \n\t"
- "packsswh %[src1], %[dest2_u], %[dest3_u] \n\t"
- "packushb %[dest0_u], %[src0], %[src1] \n\t"
- "gssdlc1 %[dest0_u], 0x07(%[dst_u]) \n\t"
- "gssdrc1 %[dest0_u], 0x00(%[dst_u]) \n\t"
-
- "packsswh %[src0], %[dest0_v], %[dest1_v] \n\t"
- "packsswh %[src1], %[dest2_v], %[dest3_v] \n\t"
- "packushb %[dest0_v], %[src0], %[src1] \n\t"
- "gssdlc1 %[dest0_v], 0x07(%[dst_v]) \n\t"
- "gssdrc1 %[dest0_v], 0x00(%[dst_v]) \n\t"
-
- "daddiu %[src_rgb], %[src_rgb], 0x40 \n\t"
- "daddiu %[dst_u], %[dst_u], 0x08 \n\t"
- "daddiu %[dst_v], %[dst_v], 0x08 \n\t"
- "daddi %[width], %[width], -0x10 \n\t"
- "bgtz %[width], 1b \n\t"
- : [src_rgb1] "=&r"(src_rgb1), [src0] "=&f"(ftmp[0]),
- [src1] "=&f"(ftmp[1]), [src_lo] "=&f"(ftmp[2]), [src_hi] "=&f"(ftmp[3]),
- [dest0_u] "=&f"(ftmp[4]), [dest0_v] "=&f"(ftmp[5]),
- [dest1_u] "=&f"(ftmp[6]), [dest1_v] "=&f"(ftmp[7]),
- [dest2_u] "=&f"(ftmp[8]), [dest2_v] "=&f"(ftmp[9]),
- [dest3_u] "=&f"(ftmp[10]), [dest3_v] "=&f"(ftmp[11]),
- [ftmp12] "=&f"(ftmp[12]), [tmp0] "=&r"(tmp[0])
- : [src_rgb] "r"(src_rgb), [src_stride_rgb] "r"(src_stride_rgb),
- [dst_u] "r"(dst_u), [dst_v] "r"(dst_v), [width] "r"(width),
- [mask_u] "f"(mask_u), [mask_v] "f"(mask_v), [value] "f"(value),
- [zero] "f"(0x00), [eight] "f"(0x08), [one] "f"(0x01),
- [sixteen] "f"(0x10)
- : "memory");
-}
-
-void RGB24ToYRow_MMI(const uint8_t* src_argb, uint8_t* dst_y, int width) {
- uint64_t src, src_hi, src_lo;
- uint64_t dest0, dest1, dest2, dest3;
- const uint64_t value = 0x1080;
- const uint64_t mask = 0x0001004200810019;
-
- __asm__ volatile(
- "1: \n\t"
- "gsldlc1 %[src], 0x07(%[src_argb]) \n\t"
- "gsldrc1 %[src], 0x00(%[src_argb]) \n\t"
- "punpcklbh %[src_lo], %[src], %[zero] \n\t"
- "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
- "dsll %[src], %[src], %[eight] \n\t"
- "punpckhbh %[src_hi], %[src], %[zero] \n\t"
- "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
- "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
- "punpckhwd %[dest0], %[src_lo], %[src_hi] \n\t"
- "paddw %[dest0], %[dest0], %[src] \n\t"
- "psrlw %[dest0], %[dest0], %[eight] \n\t"
-
- "gsldlc1 %[src], 0x0d(%[src_argb]) \n\t"
- "gsldrc1 %[src], 0x06(%[src_argb]) \n\t"
- "punpcklbh %[src_lo], %[src], %[zero] \n\t"
- "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
- "dsll %[src], %[src], %[eight] \n\t"
- "punpckhbh %[src_hi], %[src], %[zero] \n\t"
- "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
- "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
- "punpckhwd %[dest1], %[src_lo], %[src_hi] \n\t"
- "paddw %[dest1], %[dest1], %[src] \n\t"
- "psrlw %[dest1], %[dest1], %[eight] \n\t"
-
- "gsldlc1 %[src], 0x13(%[src_argb]) \n\t"
- "gsldrc1 %[src], 0x0c(%[src_argb]) \n\t"
- "punpcklbh %[src_lo], %[src], %[zero] \n\t"
- "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
- "dsll %[src], %[src], %[eight] \n\t"
- "punpckhbh %[src_hi], %[src], %[zero] \n\t"
- "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
- "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
- "punpckhwd %[dest2], %[src_lo], %[src_hi] \n\t"
- "paddw %[dest2], %[dest2], %[src] \n\t"
- "psrlw %[dest2], %[dest2], %[eight] \n\t"
-
- "gsldlc1 %[src], 0x19(%[src_argb]) \n\t"
- "gsldrc1 %[src], 0x12(%[src_argb]) \n\t"
- "punpcklbh %[src_lo], %[src], %[zero] \n\t"
- "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
- "dsll %[src], %[src], %[eight] \n\t"
- "punpckhbh %[src_hi], %[src], %[zero] \n\t"
- "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
- "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
- "punpckhwd %[dest3], %[src_lo], %[src_hi] \n\t"
- "paddw %[dest3], %[dest3], %[src] \n\t"
- "psrlw %[dest3], %[dest3], %[eight] \n\t"
-
- "packsswh %[src_lo], %[dest0], %[dest1] \n\t"
- "packsswh %[src_hi], %[dest2], %[dest3] \n\t"
- "packushb %[dest0], %[src_lo], %[src_hi] \n\t"
- "gssdlc1 %[dest0], 0x07(%[dst_y]) \n\t"
- "gssdrc1 %[dest0], 0x00(%[dst_y]) \n\t"
-
- "daddiu %[src_argb], %[src_argb], 0x18 \n\t"
- "daddiu %[dst_y], %[dst_y], 0x08 \n\t"
- "daddi %[width], %[width], -0x08 \n\t"
- "bnez %[width], 1b \n\t"
- : [src] "=&f"(src), [src_hi] "=&f"(src_hi), [src_lo] "=&f"(src_lo),
- [dest0] "=&f"(dest0), [dest1] "=&f"(dest1), [dest2] "=&f"(dest2),
- [dest3] "=&f"(dest3)
- : [src_argb] "r"(src_argb), [dst_y] "r"(dst_y), [width] "r"(width),
- [mask] "f"(mask), [value] "f"(value), [eight] "f"(0x08),
- [zero] "f"(0x00)
- : "memory");
-}
-
-void RGB24ToUVRow_MMI(const uint8_t* src_rgb,
- int src_stride_rgb,
- uint8_t* dst_u,
- uint8_t* dst_v,
- int width) {
- uint64_t src_rgb1;
- uint64_t ftmp[13];
- uint64_t tmp[1];
- const uint64_t value = 0x4040;
- const uint64_t mask_u = 0x0013002500380002;
- const uint64_t mask_v = 0x00020038002f0009;
-
- __asm__ volatile(
- "dli %[tmp0], 0x0001000100010001 \n\t"
- "dmtc1 %[tmp0], %[ftmp12] \n\t"
- "1: \n\t"
- "daddu %[src_rgb1], %[src_rgb], %[src_stride_rgb] \n\t"
- "gsldrc1 %[src0], 0x00(%[src_rgb]) \n\t"
- "gsldlc1 %[src0], 0x07(%[src_rgb]) \n\t"
- "gsldrc1 %[src1], 0x00(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x07(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "dsll %[src0], %[src0], %[eight] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "dsll %[src1], %[src1], %[eight] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "paddh %[src0], %[src0], %[ftmp12] \n\t"
- "psrlh %[src0], %[src0], %[one] \n\t"
- "dsll %[dest0_u], %[src0], %[sixteen] \n\t"
- "pinsrh_0 %[dest0_u], %[dest0_u], %[value] \n\t"
- "pinsrh_3 %[dest0_v], %[src0], %[value] \n\t"
- "pmaddhw %[dest0_u], %[dest0_u], %[mask_u] \n\t"
- "pmaddhw %[dest0_v], %[dest0_v], %[mask_v] \n\t"
-
- "gsldrc1 %[src0], 0x06(%[src_rgb]) \n\t"
- "gsldlc1 %[src0], 0x0d(%[src_rgb]) \n\t"
- "gsldrc1 %[src1], 0x06(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x0d(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "dsll %[src0], %[src0], %[eight] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "dsll %[src1], %[src1], %[eight] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "paddh %[src0], %[src0], %[ftmp12] \n\t"
- "psrlh %[src0], %[src0], %[one] \n\t"
- "dsll %[src_lo], %[src0], %[sixteen] \n\t"
- "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
- "pinsrh_3 %[src_hi], %[src0], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
-
- "punpcklwd %[src0], %[dest0_u], %[src_lo] \n\t"
- "punpckhwd %[src1], %[dest0_u], %[src_lo] \n\t"
- "psubw %[dest0_u], %[src0], %[src1] \n\t"
- "psraw %[dest0_u], %[dest0_u], %[eight] \n\t"
- "punpcklwd %[src0], %[dest0_v], %[src_hi] \n\t"
- "punpckhwd %[src1], %[dest0_v], %[src_hi] \n\t"
- "psubw %[dest0_v], %[src1], %[src0] \n\t"
- "psraw %[dest0_v], %[dest0_v], %[eight] \n\t"
-
- "gsldrc1 %[src0], 0x0c(%[src_rgb]) \n\t"
- "gsldlc1 %[src0], 0x13(%[src_rgb]) \n\t"
- "gsldrc1 %[src1], 0x0c(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x13(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "dsll %[src0], %[src0], %[eight] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "dsll %[src1], %[src1], %[eight] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "paddh %[src0], %[src0], %[ftmp12] \n\t"
- "psrlh %[src0], %[src0], %[one] \n\t"
- "dsll %[dest1_u], %[src0], %[sixteen] \n\t"
- "pinsrh_0 %[dest1_u], %[dest1_u], %[value] \n\t"
- "pinsrh_3 %[dest1_v], %[src0], %[value] \n\t"
- "pmaddhw %[dest1_u], %[dest1_u], %[mask_u] \n\t"
- "pmaddhw %[dest1_v], %[dest1_v], %[mask_v] \n\t"
-
- "gsldrc1 %[src0], 0x12(%[src_rgb]) \n\t"
- "gsldlc1 %[src0], 0x19(%[src_rgb]) \n\t"
- "gsldrc1 %[src1], 0x12(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x19(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "dsll %[src0], %[src0], %[eight] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "dsll %[src1], %[src1], %[eight] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "paddh %[src0], %[src0], %[ftmp12] \n\t"
- "psrlh %[src0], %[src0], %[one] \n\t"
- "dsll %[src_lo], %[src0], %[sixteen] \n\t"
- "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
- "pinsrh_3 %[src_hi], %[src0], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
-
- "punpcklwd %[src0], %[dest1_u], %[src_lo] \n\t"
- "punpckhwd %[src1], %[dest1_u], %[src_lo] \n\t"
- "psubw %[dest1_u], %[src0], %[src1] \n\t"
- "psraw %[dest1_u], %[dest1_u], %[eight] \n\t"
- "punpcklwd %[src0], %[dest1_v], %[src_hi] \n\t"
- "punpckhwd %[src1], %[dest1_v], %[src_hi] \n\t"
- "psubw %[dest1_v], %[src1], %[src0] \n\t"
- "psraw %[dest1_v], %[dest1_v], %[eight] \n\t"
-
- "gsldrc1 %[src0], 0x18(%[src_rgb]) \n\t"
- "gsldlc1 %[src0], 0x1f(%[src_rgb]) \n\t"
- "gsldrc1 %[src1], 0x18(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x1f(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "dsll %[src0], %[src0], %[eight] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "dsll %[src1], %[src1], %[eight] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "paddh %[src0], %[src0], %[ftmp12] \n\t"
- "psrlh %[src0], %[src0], %[one] \n\t"
- "dsll %[dest2_u], %[src0], %[sixteen] \n\t"
- "pinsrh_0 %[dest2_u], %[dest2_u], %[value] \n\t"
- "pinsrh_3 %[dest2_v], %[src0], %[value] \n\t"
- "pmaddhw %[dest2_u], %[dest2_u], %[mask_u] \n\t"
- "pmaddhw %[dest2_v], %[dest2_v], %[mask_v] \n\t"
-
- "gsldrc1 %[src0], 0x1e(%[src_rgb]) \n\t"
- "gsldlc1 %[src0], 0x25(%[src_rgb]) \n\t"
- "gsldrc1 %[src1], 0x1e(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x25(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "dsll %[src0], %[src0], %[eight] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "dsll %[src1], %[src1], %[eight] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "paddh %[src0], %[src0], %[ftmp12] \n\t"
- "psrlh %[src0], %[src0], %[one] \n\t"
- "dsll %[src_lo], %[src0], %[sixteen] \n\t"
- "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
- "pinsrh_3 %[src_hi], %[src0], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
-
- "punpcklwd %[src0], %[dest2_u], %[src_lo] \n\t"
- "punpckhwd %[src1], %[dest2_u], %[src_lo] \n\t"
- "psubw %[dest2_u], %[src0], %[src1] \n\t"
- "psraw %[dest2_u], %[dest2_u], %[eight] \n\t"
- "punpcklwd %[src0], %[dest2_v], %[src_hi] \n\t"
- "punpckhwd %[src1], %[dest2_v], %[src_hi] \n\t"
- "psubw %[dest2_v], %[src1], %[src0] \n\t"
- "psraw %[dest2_v], %[dest2_v], %[eight] \n\t"
-
- "gsldrc1 %[src0], 0x24(%[src_rgb]) \n\t"
- "gsldlc1 %[src0], 0x2b(%[src_rgb]) \n\t"
- "gsldrc1 %[src1], 0x24(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x2b(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "dsll %[src0], %[src0], %[eight] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "dsll %[src1], %[src1], %[eight] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "paddh %[src0], %[src0], %[ftmp12] \n\t"
- "psrlh %[src0], %[src0], %[one] \n\t"
- "dsll %[dest3_u], %[src0], %[sixteen] \n\t"
- "pinsrh_0 %[dest3_u], %[dest3_u], %[value] \n\t"
- "pinsrh_3 %[dest3_v], %[src0], %[value] \n\t"
- "pmaddhw %[dest3_u], %[dest3_u], %[mask_u] \n\t"
- "pmaddhw %[dest3_v], %[dest3_v], %[mask_v] \n\t"
-
- "gsldrc1 %[src0], 0x2a(%[src_rgb]) \n\t"
- "gsldlc1 %[src0], 0x31(%[src_rgb]) \n\t"
- "gsldrc1 %[src1], 0x2a(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x31(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "dsll %[src0], %[src0], %[eight] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "dsll %[src1], %[src1], %[eight] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "paddh %[src0], %[src0], %[ftmp12] \n\t"
- "psrlh %[src0], %[src0], %[one] \n\t"
- "dsll %[src_lo], %[src0], %[sixteen] \n\t"
- "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
- "pinsrh_3 %[src_hi], %[src0], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
-
- "punpcklwd %[src0], %[dest3_u], %[src_lo] \n\t"
- "punpckhwd %[src1], %[dest3_u], %[src_lo] \n\t"
- "psubw %[dest3_u], %[src0], %[src1] \n\t"
- "psraw %[dest3_u], %[dest3_u], %[eight] \n\t"
- "punpcklwd %[src0], %[dest3_v], %[src_hi] \n\t"
- "punpckhwd %[src1], %[dest3_v], %[src_hi] \n\t"
- "psubw %[dest3_v], %[src1], %[src0] \n\t"
- "psraw %[dest3_v], %[dest3_v], %[eight] \n\t"
-
- "packsswh %[src0], %[dest0_u], %[dest1_u] \n\t"
- "packsswh %[src1], %[dest2_u], %[dest3_u] \n\t"
- "packushb %[dest0_u], %[src0], %[src1] \n\t"
- "gssdlc1 %[dest0_u], 0x07(%[dst_u]) \n\t"
- "gssdrc1 %[dest0_u], 0x00(%[dst_u]) \n\t"
-
- "packsswh %[src0], %[dest0_v], %[dest1_v] \n\t"
- "packsswh %[src1], %[dest2_v], %[dest3_v] \n\t"
- "packushb %[dest0_v], %[src0], %[src1] \n\t"
- "gssdlc1 %[dest0_v], 0x07(%[dst_v]) \n\t"
- "gssdrc1 %[dest0_v], 0x00(%[dst_v]) \n\t"
-
- "daddiu %[src_rgb], %[src_rgb], 0x30 \n\t"
- "daddiu %[dst_u], %[dst_u], 0x08 \n\t"
- "daddiu %[dst_v], %[dst_v], 0x08 \n\t"
- "daddi %[width], %[width], -0x10 \n\t"
- "bgtz %[width], 1b \n\t"
- : [src_rgb1] "=&r"(src_rgb1), [src0] "=&f"(ftmp[0]),
- [src1] "=&f"(ftmp[1]), [src_lo] "=&f"(ftmp[2]), [src_hi] "=&f"(ftmp[3]),
- [dest0_u] "=&f"(ftmp[4]), [dest0_v] "=&f"(ftmp[5]),
- [dest1_u] "=&f"(ftmp[6]), [dest1_v] "=&f"(ftmp[7]),
- [dest2_u] "=&f"(ftmp[8]), [dest2_v] "=&f"(ftmp[9]),
- [dest3_u] "=&f"(ftmp[10]), [dest3_v] "=&f"(ftmp[11]),
- [ftmp12] "=&f"(ftmp[12]), [tmp0] "=&r"(tmp[0])
- : [src_rgb] "r"(src_rgb), [src_stride_rgb] "r"(src_stride_rgb),
- [dst_u] "r"(dst_u), [dst_v] "r"(dst_v), [width] "r"(width),
- [mask_u] "f"(mask_u), [mask_v] "f"(mask_v), [value] "f"(value),
- [zero] "f"(0x00), [eight] "f"(0x08), [one] "f"(0x01),
- [sixteen] "f"(0x10)
- : "memory");
-}
-
-void RAWToYRow_MMI(const uint8_t* src_argb, uint8_t* dst_y, int width) {
- uint64_t src, src_hi, src_lo;
- uint64_t dest0, dest1, dest2, dest3;
- const uint64_t value = 0x1080;
- const uint64_t mask = 0x0001001900810042;
-
- __asm__ volatile(
- "1: \n\t"
- "gsldlc1 %[src], 0x07(%[src_argb]) \n\t"
- "gsldrc1 %[src], 0x00(%[src_argb]) \n\t"
- "punpcklbh %[src_lo], %[src], %[zero] \n\t"
- "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
- "dsll %[src], %[src], %[eight] \n\t"
- "punpckhbh %[src_hi], %[src], %[zero] \n\t"
- "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
- "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
- "punpckhwd %[dest0], %[src_lo], %[src_hi] \n\t"
- "paddw %[dest0], %[dest0], %[src] \n\t"
- "psrlw %[dest0], %[dest0], %[eight] \n\t"
-
- "gsldlc1 %[src], 0x0d(%[src_argb]) \n\t"
- "gsldrc1 %[src], 0x06(%[src_argb]) \n\t"
- "punpcklbh %[src_lo], %[src], %[zero] \n\t"
- "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
- "dsll %[src], %[src], %[eight] \n\t"
- "punpckhbh %[src_hi], %[src], %[zero] \n\t"
- "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
- "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
- "punpckhwd %[dest1], %[src_lo], %[src_hi] \n\t"
- "paddw %[dest1], %[dest1], %[src] \n\t"
- "psrlw %[dest1], %[dest1], %[eight] \n\t"
-
- "gsldlc1 %[src], 0x13(%[src_argb]) \n\t"
- "gsldrc1 %[src], 0x0c(%[src_argb]) \n\t"
- "punpcklbh %[src_lo], %[src], %[zero] \n\t"
- "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
- "dsll %[src], %[src], %[eight] \n\t"
- "punpckhbh %[src_hi], %[src], %[zero] \n\t"
- "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
- "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
- "punpckhwd %[dest2], %[src_lo], %[src_hi] \n\t"
- "paddw %[dest2], %[dest2], %[src] \n\t"
- "psrlw %[dest2], %[dest2], %[eight] \n\t"
-
- "gsldlc1 %[src], 0x19(%[src_argb]) \n\t"
- "gsldrc1 %[src], 0x12(%[src_argb]) \n\t"
- "punpcklbh %[src_lo], %[src], %[zero] \n\t"
- "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
- "dsll %[src], %[src], %[eight] \n\t"
- "punpckhbh %[src_hi], %[src], %[zero] \n\t"
- "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
- "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
- "punpckhwd %[dest3], %[src_lo], %[src_hi] \n\t"
- "paddw %[dest3], %[dest3], %[src] \n\t"
- "psrlw %[dest3], %[dest3], %[eight] \n\t"
-
- "packsswh %[src_lo], %[dest0], %[dest1] \n\t"
- "packsswh %[src_hi], %[dest2], %[dest3] \n\t"
- "packushb %[dest0], %[src_lo], %[src_hi] \n\t"
- "gssdlc1 %[dest0], 0x07(%[dst_y]) \n\t"
- "gssdrc1 %[dest0], 0x00(%[dst_y]) \n\t"
-
- "daddiu %[src_argb], %[src_argb], 0x18 \n\t"
- "daddiu %[dst_y], %[dst_y], 0x08 \n\t"
- "daddi %[width], %[width], -0x08 \n\t"
- "bnez %[width], 1b \n\t"
- : [src] "=&f"(src), [src_hi] "=&f"(src_hi), [src_lo] "=&f"(src_lo),
- [dest0] "=&f"(dest0), [dest1] "=&f"(dest1), [dest2] "=&f"(dest2),
- [dest3] "=&f"(dest3)
- : [src_argb] "r"(src_argb), [dst_y] "r"(dst_y), [width] "r"(width),
- [mask] "f"(mask), [value] "f"(value), [eight] "f"(0x08),
- [zero] "f"(0x00)
- : "memory");
-}
-
-void RAWToUVRow_MMI(const uint8_t* src_rgb,
- int src_stride_rgb,
- uint8_t* dst_u,
- uint8_t* dst_v,
- int width) {
- uint64_t src_rgb1;
- uint64_t ftmp[13];
- uint64_t tmp[1];
- const uint64_t value = 0x4040;
- const uint64_t mask_u = 0x0002003800250013;
- const uint64_t mask_v = 0x0009002f00380002;
-
- __asm__ volatile(
- "dli %[tmp0], 0x0001000100010001 \n\t"
- "dmtc1 %[tmp0], %[ftmp12] \n\t"
- "1: \n\t"
- "daddu %[src_rgb1], %[src_rgb], %[src_stride_rgb] \n\t"
- "gsldrc1 %[src0], 0x00(%[src_rgb]) \n\t"
- "gsldlc1 %[src0], 0x07(%[src_rgb]) \n\t"
- "gsldrc1 %[src1], 0x00(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x07(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "dsll %[src0], %[src0], %[eight] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "dsll %[src1], %[src1], %[eight] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "paddh %[src0], %[src0], %[ftmp12] \n\t"
- "psrlh %[src0], %[src0], %[one] \n\t"
- "pinsrh_3 %[dest0_u], %[src0], %[value] \n\t"
- "dsll %[dest0_v], %[src0], %[sixteen] \n\t"
- "pinsrh_0 %[dest0_v], %[dest0_v], %[value] \n\t"
- "pmaddhw %[dest0_u], %[dest0_u], %[mask_u] \n\t"
- "pmaddhw %[dest0_v], %[dest0_v], %[mask_v] \n\t"
-
- "gsldrc1 %[src0], 0x06(%[src_rgb]) \n\t"
- "gsldlc1 %[src0], 0x0d(%[src_rgb]) \n\t"
- "gsldrc1 %[src1], 0x06(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x0d(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "dsll %[src0], %[src0], %[eight] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "dsll %[src1], %[src1], %[eight] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "paddh %[src0], %[src0], %[ftmp12] \n\t"
- "psrlh %[src0], %[src0], %[one] \n\t"
- "pinsrh_3 %[src_lo], %[src0], %[value] \n\t"
- "dsll %[src_hi], %[src0], %[sixteen] \n\t"
- "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
-
- "punpcklwd %[src0], %[dest0_u], %[src_lo] \n\t"
- "punpckhwd %[src1], %[dest0_u], %[src_lo] \n\t"
- "psubw %[dest0_u], %[src1], %[src0] \n\t"
- "psraw %[dest0_u], %[dest0_u], %[eight] \n\t"
- "punpcklwd %[src0], %[dest0_v], %[src_hi] \n\t"
- "punpckhwd %[src1], %[dest0_v], %[src_hi] \n\t"
- "psubw %[dest0_v], %[src0], %[src1] \n\t"
- "psraw %[dest0_v], %[dest0_v], %[eight] \n\t"
-
- "gsldrc1 %[src0], 0x0c(%[src_rgb]) \n\t"
- "gsldlc1 %[src0], 0x13(%[src_rgb]) \n\t"
- "gsldrc1 %[src1], 0x0c(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x13(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "dsll %[src0], %[src0], %[eight] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "dsll %[src1], %[src1], %[eight] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "paddh %[src0], %[src0], %[ftmp12] \n\t"
- "psrlh %[src0], %[src0], %[one] \n\t"
- "pinsrh_3 %[dest1_u], %[src0], %[value] \n\t"
- "dsll %[dest1_v], %[src0], %[sixteen] \n\t"
- "pinsrh_0 %[dest1_v], %[dest1_v], %[value] \n\t"
- "pmaddhw %[dest1_u], %[dest1_u], %[mask_u] \n\t"
- "pmaddhw %[dest1_v], %[dest1_v], %[mask_v] \n\t"
-
- "gsldrc1 %[src0], 0x12(%[src_rgb]) \n\t"
- "gsldlc1 %[src0], 0x19(%[src_rgb]) \n\t"
- "gsldrc1 %[src1], 0x12(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x19(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "dsll %[src0], %[src0], %[eight] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "dsll %[src1], %[src1], %[eight] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "paddh %[src0], %[src0], %[ftmp12] \n\t"
- "psrlh %[src0], %[src0], %[one] \n\t"
- "pinsrh_3 %[src_lo], %[src0], %[value] \n\t"
- "dsll %[src_hi], %[src0], %[sixteen] \n\t"
- "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
-
- "punpcklwd %[src0], %[dest1_u], %[src_lo] \n\t"
- "punpckhwd %[src1], %[dest1_u], %[src_lo] \n\t"
- "psubw %[dest1_u], %[src1], %[src0] \n\t"
- "psraw %[dest1_u], %[dest1_u], %[eight] \n\t"
- "punpcklwd %[src0], %[dest1_v], %[src_hi] \n\t"
- "punpckhwd %[src1], %[dest1_v], %[src_hi] \n\t"
- "psubw %[dest1_v], %[src0], %[src1] \n\t"
- "psraw %[dest1_v], %[dest1_v], %[eight] \n\t"
-
- "gsldrc1 %[src0], 0x18(%[src_rgb]) \n\t"
- "gsldlc1 %[src0], 0x1f(%[src_rgb]) \n\t"
- "gsldrc1 %[src1], 0x18(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x1f(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "dsll %[src0], %[src0], %[eight] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "dsll %[src1], %[src1], %[eight] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "paddh %[src0], %[src0], %[ftmp12] \n\t"
- "psrlh %[src0], %[src0], %[one] \n\t"
- "pinsrh_3 %[dest2_u], %[src0], %[value] \n\t"
- "dsll %[dest2_v], %[src0], %[sixteen] \n\t"
- "pinsrh_0 %[dest2_v], %[dest2_v], %[value] \n\t"
- "pmaddhw %[dest2_u], %[dest2_u], %[mask_u] \n\t"
- "pmaddhw %[dest2_v], %[dest2_v], %[mask_v] \n\t"
-
- "gsldrc1 %[src0], 0x1e(%[src_rgb]) \n\t"
- "gsldlc1 %[src0], 0x25(%[src_rgb]) \n\t"
- "gsldrc1 %[src1], 0x1e(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x25(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "dsll %[src0], %[src0], %[eight] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "dsll %[src1], %[src1], %[eight] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "paddh %[src0], %[src0], %[ftmp12] \n\t"
- "psrlh %[src0], %[src0], %[one] \n\t"
- "pinsrh_3 %[src_lo], %[src0], %[value] \n\t"
- "dsll %[src_hi], %[src0], %[sixteen] \n\t"
- "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
-
- "punpcklwd %[src0], %[dest2_u], %[src_lo] \n\t"
- "punpckhwd %[src1], %[dest2_u], %[src_lo] \n\t"
- "psubw %[dest2_u], %[src1], %[src0] \n\t"
- "psraw %[dest2_u], %[dest2_u], %[eight] \n\t"
- "punpcklwd %[src0], %[dest2_v], %[src_hi] \n\t"
- "punpckhwd %[src1], %[dest2_v], %[src_hi] \n\t"
- "psubw %[dest2_v], %[src0], %[src1] \n\t"
- "psraw %[dest2_v], %[dest2_v], %[eight] \n\t"
-
- "gsldrc1 %[src0], 0x24(%[src_rgb]) \n\t"
- "gsldlc1 %[src0], 0x2b(%[src_rgb]) \n\t"
- "gsldrc1 %[src1], 0x24(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x2b(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "dsll %[src0], %[src0], %[eight] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "dsll %[src1], %[src1], %[eight] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "paddh %[src0], %[src0], %[ftmp12] \n\t"
- "psrlh %[src0], %[src0], %[one] \n\t"
- "pinsrh_3 %[dest3_u], %[src0], %[value] \n\t"
- "dsll %[dest3_v], %[src0], %[sixteen] \n\t"
- "pinsrh_0 %[dest3_v], %[dest3_v], %[value] \n\t"
- "pmaddhw %[dest3_u], %[dest3_u], %[mask_u] \n\t"
- "pmaddhw %[dest3_v], %[dest3_v], %[mask_v] \n\t"
-
- "gsldrc1 %[src0], 0x2a(%[src_rgb]) \n\t"
- "gsldlc1 %[src0], 0x31(%[src_rgb]) \n\t"
- "gsldrc1 %[src1], 0x2a(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x31(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "dsll %[src0], %[src0], %[eight] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src_hi] \n\t"
- "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_lo] \n\t"
- "dsll %[src1], %[src1], %[eight] \n\t"
- "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src0], %[src_hi] \n\t"
- "paddh %[src0], %[src0], %[ftmp12] \n\t"
- "psrlh %[src0], %[src0], %[one] \n\t"
- "pinsrh_3 %[src_lo], %[src0], %[value] \n\t"
- "dsll %[src_hi], %[src0], %[sixteen] \n\t"
- "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
-
- "punpcklwd %[src0], %[dest3_u], %[src_lo] \n\t"
- "punpckhwd %[src1], %[dest3_u], %[src_lo] \n\t"
- "psubw %[dest3_u], %[src1], %[src0] \n\t"
- "psraw %[dest3_u], %[dest3_u], %[eight] \n\t"
- "punpcklwd %[src0], %[dest3_v], %[src_hi] \n\t"
- "punpckhwd %[src1], %[dest3_v], %[src_hi] \n\t"
- "psubw %[dest3_v], %[src0], %[src1] \n\t"
- "psraw %[dest3_v], %[dest3_v], %[eight] \n\t"
-
- "packsswh %[src0], %[dest0_u], %[dest1_u] \n\t"
- "packsswh %[src1], %[dest2_u], %[dest3_u] \n\t"
- "packushb %[dest0_u], %[src0], %[src1] \n\t"
- "gssdlc1 %[dest0_u], 0x07(%[dst_u]) \n\t"
- "gssdrc1 %[dest0_u], 0x00(%[dst_u]) \n\t"
-
- "packsswh %[src0], %[dest0_v], %[dest1_v] \n\t"
- "packsswh %[src1], %[dest2_v], %[dest3_v] \n\t"
- "packushb %[dest0_v], %[src0], %[src1] \n\t"
- "gssdlc1 %[dest0_v], 0x07(%[dst_v]) \n\t"
- "gssdrc1 %[dest0_v], 0x00(%[dst_v]) \n\t"
-
- "daddiu %[src_rgb], %[src_rgb], 0x30 \n\t"
- "daddiu %[dst_u], %[dst_u], 0x08 \n\t"
- "daddiu %[dst_v], %[dst_v], 0x08 \n\t"
- "daddi %[width], %[width], -0x10 \n\t"
- "bgtz %[width], 1b \n\t"
- : [src_rgb1] "=&r"(src_rgb1), [src0] "=&f"(ftmp[0]),
- [src1] "=&f"(ftmp[1]), [src_lo] "=&f"(ftmp[2]), [src_hi] "=&f"(ftmp[3]),
- [dest0_u] "=&f"(ftmp[4]), [dest0_v] "=&f"(ftmp[5]),
- [dest1_u] "=&f"(ftmp[6]), [dest1_v] "=&f"(ftmp[7]),
- [dest2_u] "=&f"(ftmp[8]), [dest2_v] "=&f"(ftmp[9]),
- [dest3_u] "=&f"(ftmp[10]), [dest3_v] "=&f"(ftmp[11]),
- [ftmp12] "=&f"(ftmp[12]), [tmp0] "=&r"(tmp[0])
- : [src_rgb] "r"(src_rgb), [src_stride_rgb] "r"(src_stride_rgb),
- [dst_u] "r"(dst_u), [dst_v] "r"(dst_v), [width] "r"(width),
- [mask_u] "f"(mask_u), [mask_v] "f"(mask_v), [value] "f"(value),
- [zero] "f"(0x00), [eight] "f"(0x08), [one] "f"(0x01),
- [sixteen] "f"(0x10)
- : "memory");
-}
-
-void ARGBToYJRow_MMI(const uint8_t* src_argb, uint8_t* dst_y, int width) {
- uint64_t src, src_hi, src_lo;
- uint64_t dest, dest0, dest1, dest2, dest3;
- uint64_t tmp0, tmp1;
- const uint64_t shift = 0x08;
- const uint64_t value = 0x80;
- const uint64_t mask0 = 0x0;
- const uint64_t mask1 = 0x0001004D0096001DULL;
-
- __asm__ volatile(
- "1: \n\t"
- "gsldlc1 %[src], 0x07(%[src_ptr]) \n\t"
- "gsldrc1 %[src], 0x00(%[src_ptr]) \n\t"
- "punpcklbh %[src_lo], %[src], %[mask0] \n\t"
- "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask1] \n\t"
- "punpckhbh %[src_hi], %[src], %[mask0] \n\t"
- "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask1] \n\t"
- "punpcklwd %[tmp0], %[src_lo], %[src_hi] \n\t"
- "punpckhwd %[tmp1], %[src_lo], %[src_hi] \n\t"
- "paddw %[dest0], %[tmp0], %[tmp1] \n\t"
- "psrlw %[dest0], %[dest0], %[shift] \n\t"
-
- "gsldlc1 %[src], 0x0f(%[src_ptr]) \n\t"
- "gsldrc1 %[src], 0x08(%[src_ptr]) \n\t"
- "punpcklbh %[src_lo], %[src], %[mask0] \n\t"
- "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask1] \n\t"
- "punpckhbh %[src_hi], %[src], %[mask0] \n\t"
- "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask1] \n\t"
- "punpcklwd %[tmp0], %[src_lo], %[src_hi] \n\t"
- "punpckhwd %[tmp1], %[src_lo], %[src_hi] \n\t"
- "paddw %[dest1], %[tmp0], %[tmp1] \n\t"
- "psrlw %[dest1], %[dest1], %[shift] \n\t"
-
- "gsldlc1 %[src], 0x17(%[src_ptr]) \n\t"
- "gsldrc1 %[src], 0x10(%[src_ptr]) \n\t"
- "punpcklbh %[src_lo], %[src], %[mask0] \n\t"
- "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask1] \n\t"
- "punpckhbh %[src_hi], %[src], %[mask0] \n\t"
- "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask1] \n\t"
- "punpcklwd %[tmp0], %[src_lo], %[src_hi] \n\t"
- "punpckhwd %[tmp1], %[src_lo], %[src_hi] \n\t"
- "paddw %[dest2], %[tmp0], %[tmp1] \n\t"
- "psrlw %[dest2], %[dest2], %[shift] \n\t"
-
- "gsldlc1 %[src], 0x1f(%[src_ptr]) \n\t"
- "gsldrc1 %[src], 0x18(%[src_ptr]) \n\t"
- "punpcklbh %[src_lo], %[src], %[mask0] \n\t"
- "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask1] \n\t"
- "punpckhbh %[src_hi], %[src], %[mask0] \n\t"
- "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask1] \n\t"
- "punpcklwd %[tmp0], %[src_lo], %[src_hi] \n\t"
- "punpckhwd %[tmp1], %[src_lo], %[src_hi] \n\t"
- "paddw %[dest3], %[tmp0], %[tmp1] \n\t"
- "psrlw %[dest3], %[dest3], %[shift] \n\t"
-
- "packsswh %[tmp0], %[dest0], %[dest1] \n\t"
- "packsswh %[tmp1], %[dest2], %[dest3] \n\t"
- "packushb %[dest], %[tmp0], %[tmp1] \n\t"
- "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
- "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
-
- "daddiu %[src_ptr], %[src_ptr], 0x20 \n\t"
- "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t"
- "daddi %[width], %[width], -0x08 \n\t"
- "bnez %[width], 1b \n\t"
- : [src] "=&f"(src), [dest] "=&f"(dest), [src_hi] "=&f"(src_hi),
- [src_lo] "=&f"(src_lo), [dest0] "=&f"(dest0), [dest1] "=&f"(dest1),
- [dest2] "=&f"(dest2), [dest3] "=&f"(dest3), [tmp0] "=&f"(tmp0),
- [tmp1] "=&f"(tmp1)
- : [src_ptr] "r"(src_argb), [dst_ptr] "r"(dst_y), [mask0] "f"(mask0),
- [mask1] "f"(mask1), [shift] "f"(shift), [value] "f"(value),
- [width] "r"(width)
- : "memory");
-}
-
-void ARGBToUVJRow_MMI(const uint8_t* src_rgb,
- int src_stride_rgb,
- uint8_t* dst_u,
- uint8_t* dst_v,
- int width) {
- uint64_t src_rgb1;
- uint64_t ftmp[12];
- const uint64_t value = 0x4040;
- const uint64_t mask_u = 0x0015002a003f0002;
- const uint64_t mask_v = 0x0002003f0035000a;
-
- __asm__ volatile(
- "1: \n\t"
- "daddu %[src_rgb1], %[src_rgb], %[src_stride_rgb] \n\t"
- "gsldrc1 %[src0], 0x00(%[src_rgb]) \n\t"
- "gsldlc1 %[src0], 0x07(%[src_rgb]) \n\t"
- "gsldrc1 %[src1], 0x00(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x07(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "punpcklbh %[src0], %[src1], %[zero] \n\t"
- "punpckhbh %[src1], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src0] \n\t"
- "paddh %[src1], %[src_hi], %[src1] \n\t"
- "pavgh %[src0], %[src0], %[src1] \n\t"
- "dsll %[dest0_u], %[src0], %[sixteen] \n\t"
- "pinsrh_0 %[dest0_u], %[dest0_u], %[value] \n\t"
- "pinsrh_3 %[dest0_v], %[src0], %[value] \n\t"
- "pmaddhw %[dest0_u], %[dest0_u], %[mask_u] \n\t"
- "pmaddhw %[dest0_v], %[dest0_v], %[mask_v] \n\t"
-
- "gsldrc1 %[src0], 0x08(%[src_rgb]) \n\t"
- "gsldlc1 %[src0], 0x0f(%[src_rgb]) \n\t"
- "gsldrc1 %[src1], 0x08(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x0f(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "punpcklbh %[src0], %[src1], %[zero] \n\t"
- "punpckhbh %[src1], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src0] \n\t"
- "paddh %[src1], %[src_hi], %[src1] \n\t"
- "pavgh %[src0], %[src0], %[src1] \n\t"
- "dsll %[src_lo], %[src0], %[sixteen] \n\t"
- "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
- "pinsrh_3 %[src_hi], %[src0], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
-
- "punpcklwd %[src0], %[dest0_u], %[src_lo] \n\t"
- "punpckhwd %[src1], %[dest0_u], %[src_lo] \n\t"
- "psubw %[dest0_u], %[src0], %[src1] \n\t"
- "psraw %[dest0_u], %[dest0_u], %[eight] \n\t"
- "punpcklwd %[src0], %[dest0_v], %[src_hi] \n\t"
- "punpckhwd %[src1], %[dest0_v], %[src_hi] \n\t"
- "psubw %[dest0_v], %[src1], %[src0] \n\t"
- "psraw %[dest0_v], %[dest0_v], %[eight] \n\t"
-
- "gsldrc1 %[src0], 0x10(%[src_rgb]) \n\t"
- "gsldlc1 %[src0], 0x17(%[src_rgb]) \n\t"
- "gsldrc1 %[src1], 0x10(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x17(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "punpcklbh %[src0], %[src1], %[zero] \n\t"
- "punpckhbh %[src1], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src0] \n\t"
- "paddh %[src1], %[src_hi], %[src1] \n\t"
- "pavgh %[src0], %[src0], %[src1] \n\t"
- "dsll %[dest1_u], %[src0], %[sixteen] \n\t"
- "pinsrh_0 %[dest1_u], %[dest1_u], %[value] \n\t"
- "pinsrh_3 %[dest1_v], %[src0], %[value] \n\t"
- "pmaddhw %[dest1_u], %[dest1_u], %[mask_u] \n\t"
- "pmaddhw %[dest1_v], %[dest1_v], %[mask_v] \n\t"
-
- "gsldrc1 %[src0], 0x18(%[src_rgb]) \n\t"
- "gsldlc1 %[src0], 0x1f(%[src_rgb]) \n\t"
- "gsldrc1 %[src1], 0x18(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x1f(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "punpcklbh %[src0], %[src1], %[zero] \n\t"
- "punpckhbh %[src1], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src0] \n\t"
- "paddh %[src1], %[src_hi], %[src1] \n\t"
- "pavgh %[src0], %[src0], %[src1] \n\t"
- "dsll %[src_lo], %[src0], %[sixteen] \n\t"
- "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
- "pinsrh_3 %[src_hi], %[src0], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
-
- "punpcklwd %[src0], %[dest1_u], %[src_lo] \n\t"
- "punpckhwd %[src1], %[dest1_u], %[src_lo] \n\t"
- "psubw %[dest1_u], %[src0], %[src1] \n\t"
- "psraw %[dest1_u], %[dest1_u], %[eight] \n\t"
- "punpcklwd %[src0], %[dest1_v], %[src_hi] \n\t"
- "punpckhwd %[src1], %[dest1_v], %[src_hi] \n\t"
- "psubw %[dest1_v], %[src1], %[src0] \n\t"
- "psraw %[dest1_v], %[dest1_v], %[eight] \n\t"
-
- "gsldrc1 %[src0], 0x20(%[src_rgb]) \n\t"
- "gsldlc1 %[src0], 0x27(%[src_rgb]) \n\t"
- "gsldrc1 %[src1], 0x20(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x27(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "punpcklbh %[src0], %[src1], %[zero] \n\t"
- "punpckhbh %[src1], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src0] \n\t"
- "paddh %[src1], %[src_hi], %[src1] \n\t"
- "pavgh %[src0], %[src0], %[src1] \n\t"
- "dsll %[dest2_u], %[src0], %[sixteen] \n\t"
- "pinsrh_0 %[dest2_u], %[dest2_u], %[value] \n\t"
- "pinsrh_3 %[dest2_v], %[src0], %[value] \n\t"
- "pmaddhw %[dest2_u], %[dest2_u], %[mask_u] \n\t"
- "pmaddhw %[dest2_v], %[dest2_v], %[mask_v] \n\t"
-
- "gsldrc1 %[src0], 0x28(%[src_rgb]) \n\t"
- "gsldlc1 %[src0], 0x2f(%[src_rgb]) \n\t"
- "gsldrc1 %[src1], 0x28(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x2f(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "punpcklbh %[src0], %[src1], %[zero] \n\t"
- "punpckhbh %[src1], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src0] \n\t"
- "paddh %[src1], %[src_hi], %[src1] \n\t"
- "pavgh %[src0], %[src0], %[src1] \n\t"
- "dsll %[src_lo], %[src0], %[sixteen] \n\t"
- "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
- "pinsrh_3 %[src_hi], %[src0], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
-
- "punpcklwd %[src0], %[dest2_u], %[src_lo] \n\t"
- "punpckhwd %[src1], %[dest2_u], %[src_lo] \n\t"
- "psubw %[dest2_u], %[src0], %[src1] \n\t"
- "psraw %[dest2_u], %[dest2_u], %[eight] \n\t"
- "punpcklwd %[src0], %[dest2_v], %[src_hi] \n\t"
- "punpckhwd %[src1], %[dest2_v], %[src_hi] \n\t"
- "psubw %[dest2_v], %[src1], %[src0] \n\t"
- "psraw %[dest2_v], %[dest2_v], %[eight] \n\t"
-
- "gsldrc1 %[src0], 0x30(%[src_rgb]) \n\t"
- "gsldlc1 %[src0], 0x37(%[src_rgb]) \n\t"
- "gsldrc1 %[src1], 0x30(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x37(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "punpcklbh %[src0], %[src1], %[zero] \n\t"
- "punpckhbh %[src1], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src0] \n\t"
- "paddh %[src1], %[src_hi], %[src1] \n\t"
- "pavgh %[src0], %[src0], %[src1] \n\t"
- "dsll %[dest3_u], %[src0], %[sixteen] \n\t"
- "pinsrh_0 %[dest3_u], %[dest3_u], %[value] \n\t"
- "pinsrh_3 %[dest3_v], %[src0], %[value] \n\t"
- "pmaddhw %[dest3_u], %[dest3_u], %[mask_u] \n\t"
- "pmaddhw %[dest3_v], %[dest3_v], %[mask_v] \n\t"
-
- "gsldrc1 %[src0], 0x38(%[src_rgb]) \n\t"
- "gsldlc1 %[src0], 0x3f(%[src_rgb]) \n\t"
- "gsldrc1 %[src1], 0x38(%[src_rgb1]) \n\t"
- "gsldlc1 %[src1], 0x3f(%[src_rgb1]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "punpcklbh %[src0], %[src1], %[zero] \n\t"
- "punpckhbh %[src1], %[src1], %[zero] \n\t"
- "paddh %[src0], %[src_lo], %[src0] \n\t"
- "paddh %[src1], %[src_hi], %[src1] \n\t"
- "pavgh %[src0], %[src0], %[src1] \n\t"
- "dsll %[src_lo], %[src0], %[sixteen] \n\t"
- "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
- "pinsrh_3 %[src_hi], %[src0], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
-
- "punpcklwd %[src0], %[dest3_u], %[src_lo] \n\t"
- "punpckhwd %[src1], %[dest3_u], %[src_lo] \n\t"
- "psubw %[dest3_u], %[src0], %[src1] \n\t"
- "psraw %[dest3_u], %[dest3_u], %[eight] \n\t"
- "punpcklwd %[src0], %[dest3_v], %[src_hi] \n\t"
- "punpckhwd %[src1], %[dest3_v], %[src_hi] \n\t"
- "psubw %[dest3_v], %[src1], %[src0] \n\t"
- "psraw %[dest3_v], %[dest3_v], %[eight] \n\t"
-
- "packsswh %[src0], %[dest0_u], %[dest1_u] \n\t"
- "packsswh %[src1], %[dest2_u], %[dest3_u] \n\t"
- "packushb %[dest0_u], %[src0], %[src1] \n\t"
- "gssdlc1 %[dest0_u], 0x07(%[dst_u]) \n\t"
- "gssdrc1 %[dest0_u], 0x00(%[dst_u]) \n\t"
-
- "packsswh %[src0], %[dest0_v], %[dest1_v] \n\t"
- "packsswh %[src1], %[dest2_v], %[dest3_v] \n\t"
- "packushb %[dest0_v], %[src0], %[src1] \n\t"
- "gssdlc1 %[dest0_v], 0x07(%[dst_v]) \n\t"
- "gssdrc1 %[dest0_v], 0x00(%[dst_v]) \n\t"
-
- "daddiu %[src_rgb], %[src_rgb], 0x40 \n\t"
- "daddiu %[dst_u], %[dst_u], 0x08 \n\t"
- "daddiu %[dst_v], %[dst_v], 0x08 \n\t"
- "daddi %[width], %[width], -0x10 \n\t"
- "bgtz %[width], 1b \n\t"
- : [src_rgb1] "=&r"(src_rgb1), [src0] "=&f"(ftmp[0]),
- [src1] "=&f"(ftmp[1]), [src_lo] "=&f"(ftmp[2]), [src_hi] "=&f"(ftmp[3]),
- [dest0_u] "=&f"(ftmp[4]), [dest0_v] "=&f"(ftmp[5]),
- [dest1_u] "=&f"(ftmp[6]), [dest1_v] "=&f"(ftmp[7]),
- [dest2_u] "=&f"(ftmp[8]), [dest2_v] "=&f"(ftmp[9]),
- [dest3_u] "=&f"(ftmp[10]), [dest3_v] "=&f"(ftmp[11])
- : [src_rgb] "r"(src_rgb), [src_stride_rgb] "r"(src_stride_rgb),
- [dst_u] "r"(dst_u), [dst_v] "r"(dst_v), [width] "r"(width),
- [mask_u] "f"(mask_u), [mask_v] "f"(mask_v), [value] "f"(value),
- [zero] "f"(0x00), [eight] "f"(0x08),
- [sixteen] "f"(0x10)
- : "memory");
-}
-
-void RGB565ToYRow_MMI(const uint8_t* src_rgb565, uint8_t* dst_y, int width) {
- uint64_t ftmp[11];
- const uint64_t value = 0x1080108010801080;
- const uint64_t mask = 0x0001004200810019;
- uint64_t c0 = 0x001f001f001f001f;
- uint64_t c1 = 0x00ff00ff00ff00ff;
- uint64_t c2 = 0x0007000700070007;
- __asm__ volatile(
- "1: \n\t"
- "gsldrc1 %[src0], 0x00(%[src_rgb565]) \n\t"
- "gsldlc1 %[src0], 0x07(%[src_rgb565]) \n\t"
- "psrlh %[src1], %[src0], %[eight] \n\t"
- "and %[b], %[src0], %[c0] \n\t"
- "and %[src0], %[src0], %[c1] \n\t"
- "psrlh %[src0], %[src0], %[five] \n\t"
- "and %[g], %[src1], %[c2] \n\t"
- "psllh %[g], %[g], %[three] \n\t"
- "or %[g], %[src0], %[g] \n\t"
- "psrlh %[r], %[src1], %[three] \n\t"
- "psllh %[src0], %[b], %[three] \n\t"
- "psrlh %[src1], %[b], %[two] \n\t"
- "or %[b], %[src0], %[src1] \n\t"
- "psllh %[src0], %[g], %[two] \n\t"
- "psrlh %[src1], %[g], %[four] \n\t"
- "or %[g], %[src0], %[src1] \n\t"
- "psllh %[src0], %[r], %[three] \n\t"
- "psrlh %[src1], %[r], %[two] \n\t"
- "or %[r], %[src0], %[src1] \n\t"
- "punpcklhw %[src0], %[b], %[r] \n\t"
- "punpcklhw %[src1], %[g], %[value] \n\t"
- "punpcklhw %[src_lo], %[src0], %[src1] \n\t"
- "punpckhhw %[src_hi], %[src0], %[src1] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
- "punpcklwd %[src0], %[src_lo], %[src_hi] \n\t"
- "punpckhwd %[src1], %[src_lo], %[src_hi] \n\t"
- "paddw %[dest0], %[src0], %[src1] \n\t"
- "psrlw %[dest0], %[dest0], %[eight] \n\t"
-
- "punpckhhw %[src0], %[b], %[r] \n\t"
- "punpckhhw %[src1], %[g], %[value] \n\t"
- "punpcklhw %[src_lo], %[src0], %[src1] \n\t"
- "punpckhhw %[src_hi], %[src0], %[src1] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
- "punpcklwd %[src0], %[src_lo], %[src_hi] \n\t"
- "punpckhwd %[src1], %[src_lo], %[src_hi] \n\t"
- "paddw %[dest1], %[src0], %[src1] \n\t"
- "psrlw %[dest1], %[dest1], %[eight] \n\t"
-
- "gsldrc1 %[src0], 0x08(%[src_rgb565]) \n\t"
- "gsldlc1 %[src0], 0x0f(%[src_rgb565]) \n\t"
- "psrlh %[src1], %[src0], %[eight] \n\t"
- "and %[b], %[src0], %[c0] \n\t"
- "and %[src0], %[src0], %[c1] \n\t"
- "psrlh %[src0], %[src0], %[five] \n\t"
- "and %[g], %[src1], %[c2] \n\t"
- "psllh %[g], %[g], %[three] \n\t"
- "or %[g], %[src0], %[g] \n\t"
- "psrlh %[r], %[src1], %[three] \n\t"
- "psllh %[src0], %[b], %[three] \n\t"
- "psrlh %[src1], %[b], %[two] \n\t"
- "or %[b], %[src0], %[src1] \n\t"
- "psllh %[src0], %[g], %[two] \n\t"
- "psrlh %[src1], %[g], %[four] \n\t"
- "or %[g], %[src0], %[src1] \n\t"
- "psllh %[src0], %[r], %[three] \n\t"
- "psrlh %[src1], %[r], %[two] \n\t"
- "or %[r], %[src0], %[src1] \n\t"
- "punpcklhw %[src0], %[b], %[r] \n\t"
- "punpcklhw %[src1], %[g], %[value] \n\t"
- "punpcklhw %[src_lo], %[src0], %[src1] \n\t"
- "punpckhhw %[src_hi], %[src0], %[src1] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
- "punpcklwd %[src0], %[src_lo], %[src_hi] \n\t"
- "punpckhwd %[src1], %[src_lo], %[src_hi] \n\t"
- "paddw %[dest2], %[src0], %[src1] \n\t"
- "psrlw %[dest2], %[dest2], %[eight] \n\t"
-
- "punpckhhw %[src0], %[b], %[r] \n\t"
- "punpckhhw %[src1], %[g], %[value] \n\t"
- "punpcklhw %[src_lo], %[src0], %[src1] \n\t"
- "punpckhhw %[src_hi], %[src0], %[src1] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
- "punpcklwd %[src0], %[src_lo], %[src_hi] \n\t"
- "punpckhwd %[src1], %[src_lo], %[src_hi] \n\t"
- "paddw %[dest3], %[src0], %[src1] \n\t"
- "psrlw %[dest3], %[dest3], %[eight] \n\t"
-
- "packsswh %[src_lo], %[dest0], %[dest1] \n\t"
- "packsswh %[src_hi], %[dest2], %[dest3] \n\t"
- "packushb %[dest0], %[src_lo], %[src_hi] \n\t"
- "gssdlc1 %[dest0], 0x07(%[dst_y]) \n\t"
- "gssdrc1 %[dest0], 0x00(%[dst_y]) \n\t"
-
- "daddiu %[src_rgb565], %[src_rgb565], 0x10 \n\t"
- "daddiu %[dst_y], %[dst_y], 0x08 \n\t"
- "daddiu %[width], %[width], -0x08 \n\t"
- "bgtz %[width], 1b \n\t"
- : [src0] "=&f"(ftmp[0]), [src1] "=&f"(ftmp[1]), [src_lo] "=&f"(ftmp[2]),
- [src_hi] "=&f"(ftmp[3]), [b] "=&f"(ftmp[4]), [g] "=&f"(ftmp[5]),
- [r] "=&f"(ftmp[6]), [dest0] "=&f"(ftmp[7]), [dest1] "=&f"(ftmp[8]),
- [dest2] "=&f"(ftmp[9]), [dest3] "=&f"(ftmp[10])
- : [src_rgb565] "r"(src_rgb565), [dst_y] "r"(dst_y), [value] "f"(value),
- [width] "r"(width), [c0] "f"(c0), [c1] "f"(c1), [c2] "f"(c2),
- [mask] "f"(mask), [eight] "f"(0x08), [five] "f"(0x05),
- [three] "f"(0x03), [two] "f"(0x02), [four] "f"(0x04)
- : "memory");
-}
-
-void ARGB1555ToYRow_MMI(const uint8_t* src_argb1555,
- uint8_t* dst_y,
- int width) {
- uint64_t ftmp[11];
- const uint64_t value = 0x1080108010801080;
- const uint64_t mask = 0x0001004200810019;
- uint64_t c0 = 0x001f001f001f001f;
- uint64_t c1 = 0x00ff00ff00ff00ff;
- uint64_t c2 = 0x0003000300030003;
- uint64_t c3 = 0x007c007c007c007c;
- __asm__ volatile(
- "1: \n\t"
- "gsldrc1 %[src0], 0x00(%[src_argb1555]) \n\t"
- "gsldlc1 %[src0], 0x07(%[src_argb1555]) \n\t"
- "psrlh %[src1], %[src0], %[eight] \n\t"
- "and %[b], %[src0], %[c0] \n\t"
- "and %[src0], %[src0], %[c1] \n\t"
- "psrlh %[src0], %[src0], %[five] \n\t"
- "and %[g], %[src1], %[c2] \n\t"
- "psllh %[g], %[g], %[three] \n\t"
- "or %[g], %[src0], %[g] \n\t"
- "and %[r], %[src1], %[c3] \n\t"
- "psrlh %[r], %[r], %[two] \n\t"
- "psllh %[src0], %[b], %[three] \n\t"
- "psrlh %[src1], %[b], %[two] \n\t"
- "or %[b], %[src0], %[src1] \n\t"
- "psllh %[src0], %[g], %[three] \n\t"
- "psrlh %[src1], %[g], %[two] \n\t"
- "or %[g], %[src0], %[src1] \n\t"
- "psllh %[src0], %[r], %[three] \n\t"
- "psrlh %[src1], %[r], %[two] \n\t"
- "or %[r], %[src0], %[src1] \n\t"
- "punpcklhw %[src0], %[b], %[r] \n\t"
- "punpcklhw %[src1], %[g], %[value] \n\t"
- "punpcklhw %[src_lo], %[src0], %[src1] \n\t"
- "punpckhhw %[src_hi], %[src0], %[src1] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
- "punpcklwd %[src0], %[src_lo], %[src_hi] \n\t"
- "punpckhwd %[src1], %[src_lo], %[src_hi] \n\t"
- "paddw %[dest0], %[src0], %[src1] \n\t"
- "psrlw %[dest0], %[dest0], %[eight] \n\t"
-
- "punpckhhw %[src0], %[b], %[r] \n\t"
- "punpckhhw %[src1], %[g], %[value] \n\t"
- "punpcklhw %[src_lo], %[src0], %[src1] \n\t"
- "punpckhhw %[src_hi], %[src0], %[src1] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
- "punpcklwd %[src0], %[src_lo], %[src_hi] \n\t"
- "punpckhwd %[src1], %[src_lo], %[src_hi] \n\t"
- "paddw %[dest1], %[src0], %[src1] \n\t"
- "psrlw %[dest1], %[dest1], %[eight] \n\t"
-
- "gsldrc1 %[src0], 0x08(%[src_argb1555]) \n\t"
- "gsldlc1 %[src0], 0x0f(%[src_argb1555]) \n\t"
- "psrlh %[src1], %[src0], %[eight] \n\t"
- "and %[b], %[src0], %[c0] \n\t"
- "and %[src0], %[src0], %[c1] \n\t"
- "psrlh %[src0], %[src0], %[five] \n\t"
- "and %[g], %[src1], %[c2] \n\t"
- "psllh %[g], %[g], %[three] \n\t"
- "or %[g], %[src0], %[g] \n\t"
- "and %[r], %[src1], %[c3] \n\t"
- "psrlh %[r], %[r], %[two] \n\t"
- "psllh %[src0], %[b], %[three] \n\t"
- "psrlh %[src1], %[b], %[two] \n\t"
- "or %[b], %[src0], %[src1] \n\t"
- "psllh %[src0], %[g], %[three] \n\t"
- "psrlh %[src1], %[g], %[two] \n\t"
- "or %[g], %[src0], %[src1] \n\t"
- "psllh %[src0], %[r], %[three] \n\t"
- "psrlh %[src1], %[r], %[two] \n\t"
- "or %[r], %[src0], %[src1] \n\t"
- "punpcklhw %[src0], %[b], %[r] \n\t"
- "punpcklhw %[src1], %[g], %[value] \n\t"
- "punpcklhw %[src_lo], %[src0], %[src1] \n\t"
- "punpckhhw %[src_hi], %[src0], %[src1] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
- "punpcklwd %[src0], %[src_lo], %[src_hi] \n\t"
- "punpckhwd %[src1], %[src_lo], %[src_hi] \n\t"
- "paddw %[dest2], %[src0], %[src1] \n\t"
- "psrlw %[dest2], %[dest2], %[eight] \n\t"
-
- "punpckhhw %[src0], %[b], %[r] \n\t"
- "punpckhhw %[src1], %[g], %[value] \n\t"
- "punpcklhw %[src_lo], %[src0], %[src1] \n\t"
- "punpckhhw %[src_hi], %[src0], %[src1] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
- "punpcklwd %[src0], %[src_lo], %[src_hi] \n\t"
- "punpckhwd %[src1], %[src_lo], %[src_hi] \n\t"
- "paddw %[dest3], %[src0], %[src1] \n\t"
- "psrlw %[dest3], %[dest3], %[eight] \n\t"
-
- "packsswh %[src_lo], %[dest0], %[dest1] \n\t"
- "packsswh %[src_hi], %[dest2], %[dest3] \n\t"
- "packushb %[dest0], %[src_lo], %[src_hi] \n\t"
- "gssdlc1 %[dest0], 0x07(%[dst_y]) \n\t"
- "gssdrc1 %[dest0], 0x00(%[dst_y]) \n\t"
-
- "daddiu %[src_argb1555], %[src_argb1555], 0x10 \n\t"
- "daddiu %[dst_y], %[dst_y], 0x08 \n\t"
- "daddiu %[width], %[width], -0x08 \n\t"
- "bgtz %[width], 1b \n\t"
- : [src0] "=&f"(ftmp[0]), [src1] "=&f"(ftmp[1]), [src_lo] "=&f"(ftmp[2]),
- [src_hi] "=&f"(ftmp[3]), [b] "=&f"(ftmp[4]), [g] "=&f"(ftmp[5]),
- [r] "=&f"(ftmp[6]), [dest0] "=&f"(ftmp[7]), [dest1] "=&f"(ftmp[8]),
- [dest2] "=&f"(ftmp[9]), [dest3] "=&f"(ftmp[10])
- : [src_argb1555] "r"(src_argb1555), [dst_y] "r"(dst_y),
- [width] "r"(width), [value] "f"(value), [mask] "f"(mask), [c0] "f"(c0),
- [c1] "f"(c1), [c2] "f"(c2), [c3] "f"(c3), [eight] "f"(0x08),
- [five] "f"(0x05), [three] "f"(0x03), [two] "f"(0x02), [seven] "f"(0x07)
- : "memory");
-}
-
-void ARGB4444ToYRow_MMI(const uint8_t* src_argb4444,
- uint8_t* dst_y,
- int width) {
- uint64_t ftmp[11];
- uint64_t value = 0x1080108010801080;
- uint64_t mask = 0x0001004200810019;
- uint64_t c0 = 0x000f000f000f000f;
- uint64_t c1 = 0x00ff00ff00ff00ff;
- __asm__ volatile(
- "1: \n\t"
- "gsldrc1 %[src0], 0x00(%[src_argb4444]) \n\t"
- "gsldlc1 %[src0], 0x07(%[src_argb4444]) \n\t"
- "psrlh %[src1], %[src0], %[eight] \n\t"
- "and %[b], %[src0], %[c0] \n\t"
- "and %[src0], %[src0], %[c1] \n\t"
- "psrlh %[g], %[src0], %[four] \n\t"
- "and %[r], %[src1], %[c0] \n\t"
- "psllh %[src0], %[b], %[four] \n\t"
- "or %[b], %[src0], %[b] \n\t"
- "psllh %[src0], %[g], %[four] \n\t"
- "or %[g], %[src0], %[g] \n\t"
- "psllh %[src0], %[r], %[four] \n\t"
- "or %[r], %[src0], %[r] \n\t"
- "punpcklhw %[src0], %[b], %[r] \n\t"
- "punpcklhw %[src1], %[g], %[value] \n\t"
- "punpcklhw %[src_lo], %[src0], %[src1] \n\t"
- "punpckhhw %[src_hi], %[src0], %[src1] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
- "punpcklwd %[src0], %[src_lo], %[src_hi] \n\t"
- "punpckhwd %[src1], %[src_lo], %[src_hi] \n\t"
- "paddw %[dest0], %[src0], %[src1] \n\t"
- "psrlw %[dest0], %[dest0], %[eight] \n\t"
-
- "punpckhhw %[src0], %[b], %[r] \n\t"
- "punpckhhw %[src1], %[g], %[value] \n\t"
- "punpcklhw %[src_lo], %[src0], %[src1] \n\t"
- "punpckhhw %[src_hi], %[src0], %[src1] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
- "punpcklwd %[src0], %[src_lo], %[src_hi] \n\t"
- "punpckhwd %[src1], %[src_lo], %[src_hi] \n\t"
- "paddw %[dest1], %[src0], %[src1] \n\t"
- "psrlw %[dest1], %[dest1], %[eight] \n\t"
-
- "gsldrc1 %[src0], 0x08(%[src_argb4444]) \n\t"
- "gsldlc1 %[src0], 0x0f(%[src_argb4444]) \n\t"
- "psrlh %[src1], %[src0], %[eight] \n\t"
- "and %[b], %[src0], %[c0] \n\t"
- "and %[src0], %[src0], %[c1] \n\t"
- "psrlh %[g], %[src0], %[four] \n\t"
- "and %[r], %[src1], %[c0] \n\t"
- "psllh %[src0], %[b], %[four] \n\t"
- "or %[b], %[src0], %[b] \n\t"
- "psllh %[src0], %[g], %[four] \n\t"
- "or %[g], %[src0], %[g] \n\t"
- "psllh %[src0], %[r], %[four] \n\t"
- "or %[r], %[src0], %[r] \n\t"
- "punpcklhw %[src0], %[b], %[r] \n\t"
- "punpcklhw %[src1], %[g], %[value] \n\t"
- "punpcklhw %[src_lo], %[src0], %[src1] \n\t"
- "punpckhhw %[src_hi], %[src0], %[src1] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
- "punpcklwd %[src0], %[src_lo], %[src_hi] \n\t"
- "punpckhwd %[src1], %[src_lo], %[src_hi] \n\t"
- "paddw %[dest2], %[src0], %[src1] \n\t"
- "psrlw %[dest2], %[dest2], %[eight] \n\t"
-
- "punpckhhw %[src0], %[b], %[r] \n\t"
- "punpckhhw %[src1], %[g], %[value] \n\t"
- "punpcklhw %[src_lo], %[src0], %[src1] \n\t"
- "punpckhhw %[src_hi], %[src0], %[src1] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
- "punpcklwd %[src0], %[src_lo], %[src_hi] \n\t"
- "punpckhwd %[src1], %[src_lo], %[src_hi] \n\t"
- "paddw %[dest3], %[src0], %[src1] \n\t"
- "psrlw %[dest3], %[dest3], %[eight] \n\t"
-
- "packsswh %[src_lo], %[dest0], %[dest1] \n\t"
- "packsswh %[src_hi], %[dest2], %[dest3] \n\t"
- "packushb %[dest0], %[src_lo], %[src_hi] \n\t"
- "gssdlc1 %[dest0], 0x07(%[dst_y]) \n\t"
- "gssdrc1 %[dest0], 0x00(%[dst_y]) \n\t"
-
- "daddiu %[src_argb4444], %[src_argb4444], 0x10 \n\t"
- "daddiu %[dst_y], %[dst_y], 0x08 \n\t"
- "daddiu %[width], %[width], -0x08 \n\t"
- "bgtz %[width], 1b \n\t"
- : [src0] "=&f"(ftmp[0]), [src1] "=&f"(ftmp[1]), [src_lo] "=&f"(ftmp[2]),
- [src_hi] "=&f"(ftmp[3]), [b] "=&f"(ftmp[4]), [g] "=&f"(ftmp[5]),
- [r] "=&f"(ftmp[6]), [dest0] "=&f"(ftmp[7]), [dest1] "=&f"(ftmp[8]),
- [dest2] "=&f"(ftmp[9]), [dest3] "=&f"(ftmp[10])
- : [src_argb4444] "r"(src_argb4444), [dst_y] "r"(dst_y),
- [width] "r"(width), [value] "f"(value), [mask] "f"(mask), [c0] "f"(c0),
- [c1] "f"(c1), [eight] "f"(0x08), [four] "f"(0x04)
- : "memory");
-}
-
-void RGB565ToUVRow_MMI(const uint8_t* src_rgb565,
- int src_stride_rgb565,
- uint8_t* dst_u,
- uint8_t* dst_v,
- int width) {
- uint64_t ftmp[13];
- uint64_t value = 0x2020202020202020;
- uint64_t mask_u = 0x0026004a00700002;
- uint64_t mask_v = 0x00020070005e0012;
- uint64_t mask = 0x93;
- uint64_t c0 = 0x001f001f001f001f;
- uint64_t c1 = 0x00ff00ff00ff00ff;
- uint64_t c2 = 0x0007000700070007;
- __asm__ volatile(
- "daddu %[next_rgb565], %[src_rgb565], %[next_rgb565] \n\t"
- "1: \n\t"
- "gsldrc1 %[src0], 0x00(%[src_rgb565]) \n\t"
- "gsldlc1 %[src0], 0x07(%[src_rgb565]) \n\t"
- "gsldrc1 %[src1], 0x00(%[next_rgb565]) \n\t"
- "gsldlc1 %[src1], 0x07(%[next_rgb565]) \n\t"
- "psrlh %[dest0_u], %[src0], %[eight] \n\t"
- "and %[b0], %[src0], %[c0] \n\t"
- "and %[src0], %[src0], %[c1] \n\t"
- "psrlh %[src0], %[src0], %[five] \n\t"
- "and %[g0], %[dest0_u], %[c2] \n\t"
- "psllh %[g0], %[g0], %[three] \n\t"
- "or %[g0], %[src0], %[g0] \n\t"
- "psrlh %[r0], %[dest0_u], %[three] \n\t"
- "psrlh %[src0], %[src1], %[eight] \n\t"
- "and %[dest0_u], %[src1], %[c0] \n\t"
- "and %[src1], %[src1], %[c1] \n\t"
- "psrlh %[src1], %[src1], %[five] \n\t"
- "and %[dest0_v], %[src0], %[c2] \n\t"
- "psllh %[dest0_v], %[dest0_v], %[three] \n\t"
- "or %[dest0_v], %[src1], %[dest0_v] \n\t"
- "psrlh %[src0], %[src0], %[three] \n\t"
- "paddh %[b0], %[b0], %[dest0_u] \n\t"
- "paddh %[g0], %[g0], %[dest0_v] \n\t"
- "paddh %[r0], %[r0], %[src0] \n\t"
- "punpcklhw %[src0], %[b0], %[r0] \n\t"
- "punpckhhw %[src1], %[b0], %[r0] \n\t"
- "punpcklwd %[dest0_u], %[src0], %[src1] \n\t"
- "punpckhwd %[dest0_v], %[src0], %[src1] \n\t"
- "paddh %[src0], %[dest0_u], %[dest0_v] \n\t"
- "psrlh %[b0], %[src0], %[six] \n\t"
- "psllh %[r0], %[src0], %[one] \n\t"
- "or %[b0], %[b0], %[r0] \n\t"
- "punpcklhw %[src0], %[g0], %[value] \n\t"
- "punpckhhw %[src1], %[g0], %[value] \n\t"
- "punpcklwd %[dest0_u], %[src0], %[src1] \n\t"
- "punpckhwd %[dest0_v], %[src0], %[src1] \n\t"
- "paddh %[g0], %[dest0_u], %[dest0_v] \n\t"
- "punpcklhw %[src0], %[b0], %[g0] \n\t"
- "punpckhhw %[src1], %[b0], %[g0] \n\t"
-
- "pmaddhw %[dest0_v], %[src0], %[mask_v] \n\t"
- "pshufh %[dest0_u], %[src0], %[mask] \n\t"
- "pmaddhw %[dest0_u], %[dest0_u], %[mask_u] \n\t"
- "pmaddhw %[g0], %[src1], %[mask_v] \n\t"
- "pshufh %[b0], %[src1], %[mask] \n\t"
- "pmaddhw %[b0], %[b0], %[mask_u] \n\t"
-
- "punpcklwd %[src0], %[dest0_u], %[b0] \n\t"
- "punpckhwd %[src1], %[dest0_u], %[b0] \n\t"
- "psubw %[dest0_u], %[src0], %[src1] \n\t"
- "psraw %[dest0_u], %[dest0_u], %[eight] \n\t"
- "punpcklwd %[src0], %[dest0_v], %[g0] \n\t"
- "punpckhwd %[src1], %[dest0_v], %[g0] \n\t"
- "psubw %[dest0_v], %[src1], %[src0] \n\t"
- "psraw %[dest0_v], %[dest0_v], %[eight] \n\t"
-
- "gsldrc1 %[src0], 0x08(%[src_rgb565]) \n\t"
- "gsldlc1 %[src0], 0x0f(%[src_rgb565]) \n\t"
- "gsldrc1 %[src1], 0x08(%[next_rgb565]) \n\t"
- "gsldlc1 %[src1], 0x0f(%[next_rgb565]) \n\t"
- "psrlh %[dest1_u], %[src0], %[eight] \n\t"
- "and %[b0], %[src0], %[c0] \n\t"
- "and %[src0], %[src0], %[c1] \n\t"
- "psrlh %[src0], %[src0], %[five] \n\t"
- "and %[g0], %[dest1_u], %[c2] \n\t"
- "psllh %[g0], %[g0], %[three] \n\t"
- "or %[g0], %[src0], %[g0] \n\t"
- "psrlh %[r0], %[dest1_u], %[three] \n\t"
- "psrlh %[src0], %[src1], %[eight] \n\t"
- "and %[dest1_u], %[src1], %[c0] \n\t"
- "and %[src1], %[src1], %[c1] \n\t"
- "psrlh %[src1], %[src1], %[five] \n\t"
- "and %[dest1_v], %[src0], %[c2] \n\t"
- "psllh %[dest1_v], %[dest1_v], %[three] \n\t"
- "or %[dest1_v], %[src1], %[dest1_v] \n\t"
- "psrlh %[src0], %[src0], %[three] \n\t"
- "paddh %[b0], %[b0], %[dest1_u] \n\t"
- "paddh %[g0], %[g0], %[dest1_v] \n\t"
- "paddh %[r0], %[r0], %[src0] \n\t"
- "punpcklhw %[src0], %[b0], %[r0] \n\t"
- "punpckhhw %[src1], %[b0], %[r0] \n\t"
- "punpcklwd %[dest1_u], %[src0], %[src1] \n\t"
- "punpckhwd %[dest1_v], %[src0], %[src1] \n\t"
- "paddh %[src0], %[dest1_u], %[dest1_v] \n\t"
- "psrlh %[b0], %[src0], %[six] \n\t"
- "psllh %[r0], %[src0], %[one] \n\t"
- "or %[b0], %[b0], %[r0] \n\t"
- "punpcklhw %[src0], %[g0], %[value] \n\t"
- "punpckhhw %[src1], %[g0], %[value] \n\t"
- "punpcklwd %[dest1_u], %[src0], %[src1] \n\t"
- "punpckhwd %[dest1_v], %[src0], %[src1] \n\t"
- "paddh %[g0], %[dest1_u], %[dest1_v] \n\t"
- "punpcklhw %[src0], %[b0], %[g0] \n\t"
- "punpckhhw %[src1], %[b0], %[g0] \n\t"
-
- "pmaddhw %[dest1_v], %[src0], %[mask_v] \n\t"
- "pshufh %[dest1_u], %[src0], %[mask] \n\t"
- "pmaddhw %[dest1_u], %[dest1_u], %[mask_u] \n\t"
- "pmaddhw %[g0], %[src1], %[mask_v] \n\t"
- "pshufh %[b0], %[src1], %[mask] \n\t"
- "pmaddhw %[b0], %[b0], %[mask_u] \n\t"
-
- "punpcklwd %[src0], %[dest1_u], %[b0] \n\t"
- "punpckhwd %[src1], %[dest1_u], %[b0] \n\t"
- "psubw %[dest1_u], %[src0], %[src1] \n\t"
- "psraw %[dest1_u], %[dest1_u], %[eight] \n\t"
- "punpcklwd %[src0], %[dest1_v], %[g0] \n\t"
- "punpckhwd %[src1], %[dest1_v], %[g0] \n\t"
- "psubw %[dest1_v], %[src1], %[src0] \n\t"
- "psraw %[dest1_v], %[dest1_v], %[eight] \n\t"
-
- "gsldrc1 %[src0], 0x10(%[src_rgb565]) \n\t"
- "gsldlc1 %[src0], 0x17(%[src_rgb565]) \n\t"
- "gsldrc1 %[src1], 0x10(%[next_rgb565]) \n\t"
- "gsldlc1 %[src1], 0x17(%[next_rgb565]) \n\t"
- "psrlh %[dest2_u], %[src0], %[eight] \n\t"
- "and %[b0], %[src0], %[c0] \n\t"
- "and %[src0], %[src0], %[c1] \n\t"
- "psrlh %[src0], %[src0], %[five] \n\t"
- "and %[g0], %[dest2_u], %[c2] \n\t"
- "psllh %[g0], %[g0], %[three] \n\t"
- "or %[g0], %[src0], %[g0] \n\t"
- "psrlh %[r0], %[dest2_u], %[three] \n\t"
- "psrlh %[src0], %[src1], %[eight] \n\t"
- "and %[dest2_u], %[src1], %[c0] \n\t"
- "and %[src1], %[src1], %[c1] \n\t"
- "psrlh %[src1], %[src1], %[five] \n\t"
- "and %[dest2_v], %[src0], %[c2] \n\t"
- "psllh %[dest2_v], %[dest2_v], %[three] \n\t"
- "or %[dest2_v], %[src1], %[dest2_v] \n\t"
- "psrlh %[src0], %[src0], %[three] \n\t"
- "paddh %[b0], %[b0], %[dest2_u] \n\t"
- "paddh %[g0], %[g0], %[dest2_v] \n\t"
- "paddh %[r0], %[r0], %[src0] \n\t"
- "punpcklhw %[src0], %[b0], %[r0] \n\t"
- "punpckhhw %[src1], %[b0], %[r0] \n\t"
- "punpcklwd %[dest2_u], %[src0], %[src1] \n\t"
- "punpckhwd %[dest2_v], %[src0], %[src1] \n\t"
- "paddh %[src0], %[dest2_u], %[dest2_v] \n\t"
- "psrlh %[b0], %[src0], %[six] \n\t"
- "psllh %[r0], %[src0], %[one] \n\t"
- "or %[b0], %[b0], %[r0] \n\t"
- "punpcklhw %[src0], %[g0], %[value] \n\t"
- "punpckhhw %[src1], %[g0], %[value] \n\t"
- "punpcklwd %[dest2_u], %[src0], %[src1] \n\t"
- "punpckhwd %[dest2_v], %[src0], %[src1] \n\t"
- "paddh %[g0], %[dest2_u], %[dest2_v] \n\t"
- "punpcklhw %[src0], %[b0], %[g0] \n\t"
- "punpckhhw %[src1], %[b0], %[g0] \n\t"
-
- "pmaddhw %[dest2_v], %[src0], %[mask_v] \n\t"
- "pshufh %[dest2_u], %[src0], %[mask] \n\t"
- "pmaddhw %[dest2_u], %[dest2_u], %[mask_u] \n\t"
- "pmaddhw %[g0], %[src1], %[mask_v] \n\t"
- "pshufh %[b0], %[src1], %[mask] \n\t"
- "pmaddhw %[b0], %[b0], %[mask_u] \n\t"
-
- "punpcklwd %[src0], %[dest2_u], %[b0] \n\t"
- "punpckhwd %[src1], %[dest2_u], %[b0] \n\t"
- "psubw %[dest2_u], %[src0], %[src1] \n\t"
- "psraw %[dest2_u], %[dest2_u], %[eight] \n\t"
- "punpcklwd %[src0], %[dest2_v], %[g0] \n\t"
- "punpckhwd %[src1], %[dest2_v], %[g0] \n\t"
- "psubw %[dest2_v], %[src1], %[src0] \n\t"
- "psraw %[dest2_v], %[dest2_v], %[eight] \n\t"
-
- "gsldrc1 %[src0], 0x18(%[src_rgb565]) \n\t"
- "gsldlc1 %[src0], 0x1f(%[src_rgb565]) \n\t"
- "gsldrc1 %[src1], 0x18(%[next_rgb565]) \n\t"
- "gsldlc1 %[src1], 0x1f(%[next_rgb565]) \n\t"
- "psrlh %[dest3_u], %[src0], %[eight] \n\t"
- "and %[b0], %[src0], %[c0] \n\t"
- "and %[src0], %[src0], %[c1] \n\t"
- "psrlh %[src0], %[src0], %[five] \n\t"
- "and %[g0], %[dest3_u], %[c2] \n\t"
- "psllh %[g0], %[g0], %[three] \n\t"
- "or %[g0], %[src0], %[g0] \n\t"
- "psrlh %[r0], %[dest3_u], %[three] \n\t"
- "psrlh %[src0], %[src1], %[eight] \n\t"
- "and %[dest3_u], %[src1], %[c0] \n\t"
- "and %[src1], %[src1], %[c1] \n\t"
- "psrlh %[src1], %[src1], %[five] \n\t"
- "and %[dest3_v], %[src0], %[c2] \n\t"
- "psllh %[dest3_v], %[dest3_v], %[three] \n\t"
- "or %[dest3_v], %[src1], %[dest3_v] \n\t"
- "psrlh %[src0], %[src0], %[three] \n\t"
- "paddh %[b0], %[b0], %[dest3_u] \n\t"
- "paddh %[g0], %[g0], %[dest3_v] \n\t"
- "paddh %[r0], %[r0], %[src0] \n\t"
- "punpcklhw %[src0], %[b0], %[r0] \n\t"
- "punpckhhw %[src1], %[b0], %[r0] \n\t"
- "punpcklwd %[dest3_u], %[src0], %[src1] \n\t"
- "punpckhwd %[dest3_v], %[src0], %[src1] \n\t"
- "paddh %[src0], %[dest3_u], %[dest3_v] \n\t"
- "psrlh %[b0], %[src0], %[six] \n\t"
- "psllh %[r0], %[src0], %[one] \n\t"
- "or %[b0], %[b0], %[r0] \n\t"
- "punpcklhw %[src0], %[g0], %[value] \n\t"
- "punpckhhw %[src1], %[g0], %[value] \n\t"
- "punpcklwd %[dest3_u], %[src0], %[src1] \n\t"
- "punpckhwd %[dest3_v], %[src0], %[src1] \n\t"
- "paddh %[g0], %[dest3_u], %[dest3_v] \n\t"
- "punpcklhw %[src0], %[b0], %[g0] \n\t"
- "punpckhhw %[src1], %[b0], %[g0] \n\t"
-
- "pmaddhw %[dest3_v], %[src0], %[mask_v] \n\t"
- "pshufh %[dest3_u], %[src0], %[mask] \n\t"
- "pmaddhw %[dest3_u], %[dest3_u], %[mask_u] \n\t"
- "pmaddhw %[g0], %[src1], %[mask_v] \n\t"
- "pshufh %[b0], %[src1], %[mask] \n\t"
- "pmaddhw %[b0], %[b0], %[mask_u] \n\t"
-
- "punpcklwd %[src0], %[dest3_u], %[b0] \n\t"
- "punpckhwd %[src1], %[dest3_u], %[b0] \n\t"
- "psubw %[dest3_u], %[src0], %[src1] \n\t"
- "psraw %[dest3_u], %[dest3_u], %[eight] \n\t"
- "punpcklwd %[src0], %[dest3_v], %[g0] \n\t"
- "punpckhwd %[src1], %[dest3_v], %[g0] \n\t"
- "psubw %[dest3_v], %[src1], %[src0] \n\t"
- "psraw %[dest3_v], %[dest3_v], %[eight] \n\t"
-
- "packsswh %[src0], %[dest0_u], %[dest1_u] \n\t"
- "packsswh %[src1], %[dest2_u], %[dest3_u] \n\t"
- "packushb %[dest0_u], %[src0], %[src1] \n\t"
- "gssdlc1 %[dest0_u], 0x07(%[dst_u]) \n\t"
- "gssdrc1 %[dest0_u], 0x00(%[dst_u]) \n\t"
- "packsswh %[src0], %[dest0_v], %[dest1_v] \n\t"
- "packsswh %[src1], %[dest2_v], %[dest3_v] \n\t"
- "packushb %[dest0_v], %[src0], %[src1] \n\t"
- "gssdlc1 %[dest0_v], 0x07(%[dst_v]) \n\t"
- "gssdrc1 %[dest0_v], 0x00(%[dst_v]) \n\t"
-
- "daddiu %[src_rgb565], %[src_rgb565], 0x20 \n\t"
- "daddiu %[next_rgb565], %[next_rgb565], 0x20 \n\t"
- "daddiu %[dst_u], %[dst_u], 0x08 \n\t"
- "daddiu %[dst_v], %[dst_v], 0x08 \n\t"
- "daddiu %[width], %[width], -0x10 \n\t"
- "bgtz %[width], 1b \n\t"
- : [src0] "=&f"(ftmp[0]), [src1] "=&f"(ftmp[1]), [b0] "=&f"(ftmp[2]),
- [g0] "=&f"(ftmp[3]), [r0] "=&f"(ftmp[4]), [dest0_u] "=&f"(ftmp[5]),
- [dest1_u] "=&f"(ftmp[6]), [dest2_u] "=&f"(ftmp[7]),
- [dest3_u] "=&f"(ftmp[8]), [dest0_v] "=&f"(ftmp[9]),
- [dest1_v] "=&f"(ftmp[10]), [dest2_v] "=&f"(ftmp[11]),
- [dest3_v] "=&f"(ftmp[12])
- : [src_rgb565] "r"(src_rgb565), [next_rgb565] "r"(src_stride_rgb565),
- [dst_u] "r"(dst_u), [dst_v] "r"(dst_v), [width] "r"(width),
- [value] "f"(value), [c0] "f"(c0), [c1] "f"(c1), [c2] "f"(c2),
- [mask] "f"(mask), [mask_u] "f"(mask_u), [mask_v] "f"(mask_v),
- [eight] "f"(0x08), [six] "f"(0x06), [five] "f"(0x05), [three] "f"(0x03),
- [one] "f"(0x01)
- : "memory");
-}
-
-void ARGB1555ToUVRow_MMI(const uint8_t* src_argb1555,
- int src_stride_argb1555,
- uint8_t* dst_u,
- uint8_t* dst_v,
- int width) {
- uint64_t ftmp[11];
- uint64_t value = 0x2020202020202020;
- uint64_t mask_u = 0x0026004a00700002;
- uint64_t mask_v = 0x00020070005e0012;
- uint64_t mask = 0x93;
- uint64_t c0 = 0x001f001f001f001f;
- uint64_t c1 = 0x00ff00ff00ff00ff;
- uint64_t c2 = 0x0003000300030003;
- uint64_t c3 = 0x007c007c007c007c;
- __asm__ volatile(
- "daddu %[next_argb1555], %[src_argb1555], %[next_argb1555] \n\t"
- "1: \n\t"
- "gsldrc1 %[src0], 0x00(%[src_argb1555]) \n\t"
- "gsldlc1 %[src0], 0x07(%[src_argb1555]) \n\t"
- "gsldrc1 %[src1], 0x00(%[next_argb1555]) \n\t"
- "gsldlc1 %[src1], 0x07(%[next_argb1555]) \n\t"
- "psrlh %[dest0_u], %[src0], %[eight] \n\t"
- "and %[b0], %[src0], %[c0] \n\t"
- "and %[src0], %[src0], %[c1] \n\t"
- "psrlh %[src0], %[src0], %[five] \n\t"
- "and %[g0], %[dest0_u], %[c2] \n\t"
- "psllh %[g0], %[g0], %[three] \n\t"
- "or %[g0], %[src0], %[g0] \n\t"
- "and %[r0], %[dest0_u], %[c3] \n\t"
- "psrlh %[r0], %[r0], %[two] \n\t"
- "psrlh %[src0], %[src1], %[eight] \n\t"
- "and %[dest0_u], %[src1], %[c0] \n\t"
- "and %[src1], %[src1], %[c1] \n\t"
- "psrlh %[src1], %[src1], %[five] \n\t"
- "and %[dest0_v], %[src0], %[c2] \n\t"
- "psllh %[dest0_v], %[dest0_v], %[three] \n\t"
- "or %[dest0_v], %[src1], %[dest0_v] \n\t"
- "and %[src0], %[src0], %[c3] \n\t"
- "psrlh %[src0], %[src0], %[two] \n\t"
- "paddh %[b0], %[b0], %[dest0_u] \n\t"
- "paddh %[g0], %[g0], %[dest0_v] \n\t"
- "paddh %[r0], %[r0], %[src0] \n\t"
- "punpcklhw %[src0], %[b0], %[r0] \n\t"
- "punpckhhw %[src1], %[b0], %[r0] \n\t"
- "punpcklwd %[dest0_u], %[src0], %[src1] \n\t"
- "punpckhwd %[dest0_v], %[src0], %[src1] \n\t"
- "paddh %[src0], %[dest0_u], %[dest0_v] \n\t"
- "psrlh %[b0], %[src0], %[six] \n\t"
- "psllh %[r0], %[src0], %[one] \n\t"
- "or %[b0], %[b0], %[r0] \n\t"
- "psrlh %[r0], %[g0], %[six] \n\t"
- "psllh %[g0], %[g0], %[one] \n\t"
- "or %[g0], %[g0], %[r0] \n\t"
- "punpcklhw %[src0], %[g0], %[value] \n\t"
- "punpckhhw %[src1], %[g0], %[value] \n\t"
- "punpcklwd %[dest0_u], %[src0], %[src1] \n\t"
- "punpckhwd %[dest0_v], %[src0], %[src1] \n\t"
- "paddh %[g0], %[dest0_u], %[dest0_v] \n\t"
- "punpcklhw %[src0], %[b0], %[g0] \n\t"
- "punpckhhw %[src1], %[b0], %[g0] \n\t"
-
- "pmaddhw %[dest0_v], %[src0], %[mask_v] \n\t"
- "pshufh %[dest0_u], %[src0], %[mask] \n\t"
- "pmaddhw %[dest0_u], %[dest0_u], %[mask_u] \n\t"
- "pmaddhw %[g0], %[src1], %[mask_v] \n\t"
- "pshufh %[b0], %[src1], %[mask] \n\t"
- "pmaddhw %[b0], %[b0], %[mask_u] \n\t"
-
- "punpcklwd %[src0], %[dest0_u], %[b0] \n\t"
- "punpckhwd %[src1], %[dest0_u], %[b0] \n\t"
- "psubw %[dest0_u], %[src0], %[src1] \n\t"
- "psraw %[dest0_u], %[dest0_u], %[eight] \n\t"
- "punpcklwd %[src0], %[dest0_v], %[g0] \n\t"
- "punpckhwd %[src1], %[dest0_v], %[g0] \n\t"
- "psubw %[dest0_v], %[src1], %[src0] \n\t"
- "psraw %[dest0_v], %[dest0_v], %[eight] \n\t"
-
- "gsldrc1 %[src0], 0x08(%[src_argb1555]) \n\t"
- "gsldlc1 %[src0], 0x0f(%[src_argb1555]) \n\t"
- "gsldrc1 %[src1], 0x08(%[next_argb1555]) \n\t"
- "gsldlc1 %[src1], 0x0f(%[next_argb1555]) \n\t"
- "psrlh %[dest1_u], %[src0], %[eight] \n\t"
- "and %[b0], %[src0], %[c0] \n\t"
- "and %[src0], %[src0], %[c1] \n\t"
- "psrlh %[src0], %[src0], %[five] \n\t"
- "and %[g0], %[dest1_u], %[c2] \n\t"
- "psllh %[g0], %[g0], %[three] \n\t"
- "or %[g0], %[src0], %[g0] \n\t"
- "and %[r0], %[dest1_u], %[c3] \n\t"
- "psrlh %[r0], %[r0], %[two] \n\t"
- "psrlh %[src0], %[src1], %[eight] \n\t"
- "and %[dest1_u], %[src1], %[c0] \n\t"
- "and %[src1], %[src1], %[c1] \n\t"
- "psrlh %[src1], %[src1], %[five] \n\t"
- "and %[dest1_v], %[src0], %[c2] \n\t"
- "psllh %[dest1_v], %[dest1_v], %[three] \n\t"
- "or %[dest1_v], %[src1], %[dest1_v] \n\t"
- "and %[src0], %[src0], %[c3] \n\t"
- "psrlh %[src0], %[src0], %[two] \n\t"
- "paddh %[b0], %[b0], %[dest1_u] \n\t"
- "paddh %[g0], %[g0], %[dest1_v] \n\t"
- "paddh %[r0], %[r0], %[src0] \n\t"
- "punpcklhw %[src0], %[b0], %[r0] \n\t"
- "punpckhhw %[src1], %[b0], %[r0] \n\t"
- "punpcklwd %[dest1_u], %[src0], %[src1] \n\t"
- "punpckhwd %[dest1_v], %[src0], %[src1] \n\t"
- "paddh %[src0], %[dest1_u], %[dest1_v] \n\t"
- "psrlh %[b0], %[src0], %[six] \n\t"
- "psllh %[r0], %[src0], %[one] \n\t"
- "or %[b0], %[b0], %[r0] \n\t"
- "psrlh %[r0], %[g0], %[six] \n\t"
- "psllh %[g0], %[g0], %[one] \n\t"
- "or %[g0], %[g0], %[r0] \n\t"
- "punpcklhw %[src0], %[g0], %[value] \n\t"
- "punpckhhw %[src1], %[g0], %[value] \n\t"
- "punpcklwd %[dest1_u], %[src0], %[src1] \n\t"
- "punpckhwd %[dest1_v], %[src0], %[src1] \n\t"
- "paddh %[g0], %[dest1_u], %[dest1_v] \n\t"
- "punpcklhw %[src0], %[b0], %[g0] \n\t"
- "punpckhhw %[src1], %[b0], %[g0] \n\t"
-
- "pmaddhw %[dest1_v], %[src0], %[mask_v] \n\t"
- "pshufh %[dest1_u], %[src0], %[mask] \n\t"
- "pmaddhw %[dest1_u], %[dest1_u], %[mask_u] \n\t"
- "pmaddhw %[g0], %[src1], %[mask_v] \n\t"
- "pshufh %[b0], %[src1], %[mask] \n\t"
- "pmaddhw %[b0], %[b0], %[mask_u] \n\t"
-
- "punpcklwd %[src0], %[dest1_u], %[b0] \n\t"
- "punpckhwd %[src1], %[dest1_u], %[b0] \n\t"
- "psubw %[dest1_u], %[src0], %[src1] \n\t"
- "psraw %[dest1_u], %[dest1_u], %[eight] \n\t"
- "punpcklwd %[src0], %[dest1_v], %[g0] \n\t"
- "punpckhwd %[src1], %[dest1_v], %[g0] \n\t"
- "psubw %[dest1_v], %[src1], %[src0] \n\t"
- "psraw %[dest1_v], %[dest1_v], %[eight] \n\t"
-
- "packsswh %[dest0_u], %[dest0_u], %[dest1_u] \n\t"
- "packsswh %[dest1_u], %[dest0_v], %[dest1_v] \n\t"
-
- "gsldrc1 %[src0], 0x10(%[src_argb1555]) \n\t"
- "gsldlc1 %[src0], 0x17(%[src_argb1555]) \n\t"
- "gsldrc1 %[src1], 0x10(%[next_argb1555]) \n\t"
- "gsldlc1 %[src1], 0x17(%[next_argb1555]) \n\t"
- "psrlh %[dest2_u], %[src0], %[eight] \n\t"
- "and %[b0], %[src0], %[c0] \n\t"
- "and %[src0], %[src0], %[c1] \n\t"
- "psrlh %[src0], %[src0], %[five] \n\t"
- "and %[g0], %[dest2_u], %[c2] \n\t"
- "psllh %[g0], %[g0], %[three] \n\t"
- "or %[g0], %[src0], %[g0] \n\t"
- "and %[r0], %[dest2_u], %[c3] \n\t"
- "psrlh %[r0], %[r0], %[two] \n\t"
- "psrlh %[src0], %[src1], %[eight] \n\t"
- "and %[dest2_u], %[src1], %[c0] \n\t"
- "and %[src1], %[src1], %[c1] \n\t"
- "psrlh %[src1], %[src1], %[five] \n\t"
- "and %[dest0_v], %[src0], %[c2] \n\t"
- "psllh %[dest0_v], %[dest0_v], %[three] \n\t"
- "or %[dest0_v], %[src1], %[dest0_v] \n\t"
- "and %[src0], %[src0], %[c3] \n\t"
- "psrlh %[src0], %[src0], %[two] \n\t"
- "paddh %[b0], %[b0], %[dest2_u] \n\t"
- "paddh %[g0], %[g0], %[dest0_v] \n\t"
- "paddh %[r0], %[r0], %[src0] \n\t"
- "punpcklhw %[src0], %[b0], %[r0] \n\t"
- "punpckhhw %[src1], %[b0], %[r0] \n\t"
- "punpcklwd %[dest2_u], %[src0], %[src1] \n\t"
- "punpckhwd %[dest0_v], %[src0], %[src1] \n\t"
- "paddh %[src0], %[dest2_u], %[dest0_v] \n\t"
- "psrlh %[b0], %[src0], %[six] \n\t"
- "psllh %[r0], %[src0], %[one] \n\t"
- "or %[b0], %[b0], %[r0] \n\t"
- "psrlh %[r0], %[g0], %[six] \n\t"
- "psllh %[g0], %[g0], %[one] \n\t"
- "or %[g0], %[g0], %[r0] \n\t"
- "punpcklhw %[src0], %[g0], %[value] \n\t"
- "punpckhhw %[src1], %[g0], %[value] \n\t"
- "punpcklwd %[dest2_u], %[src0], %[src1] \n\t"
- "punpckhwd %[dest0_v], %[src0], %[src1] \n\t"
- "paddh %[g0], %[dest2_u], %[dest0_v] \n\t"
- "punpcklhw %[src0], %[b0], %[g0] \n\t"
- "punpckhhw %[src1], %[b0], %[g0] \n\t"
-
- "pmaddhw %[dest0_v], %[src0], %[mask_v] \n\t"
- "pshufh %[dest2_u], %[src0], %[mask] \n\t"
- "pmaddhw %[dest2_u], %[dest2_u], %[mask_u] \n\t"
- "pmaddhw %[g0], %[src1], %[mask_v] \n\t"
- "pshufh %[b0], %[src1], %[mask] \n\t"
- "pmaddhw %[b0], %[b0], %[mask_u] \n\t"
-
- "punpcklwd %[src0], %[dest2_u], %[b0] \n\t"
- "punpckhwd %[src1], %[dest2_u], %[b0] \n\t"
- "psubw %[dest2_u], %[src0], %[src1] \n\t"
- "psraw %[dest2_u], %[dest2_u], %[eight] \n\t"
- "punpcklwd %[src0], %[dest0_v], %[g0] \n\t"
- "punpckhwd %[src1], %[dest0_v], %[g0] \n\t"
- "psubw %[dest0_v], %[src1], %[src0] \n\t"
- "psraw %[dest0_v], %[dest0_v], %[eight] \n\t"
-
- "gsldrc1 %[src0], 0x18(%[src_argb1555]) \n\t"
- "gsldlc1 %[src0], 0x1f(%[src_argb1555]) \n\t"
- "gsldrc1 %[src1], 0x18(%[next_argb1555]) \n\t"
- "gsldlc1 %[src1], 0x1f(%[next_argb1555]) \n\t"
- "psrlh %[dest3_u], %[src0], %[eight] \n\t"
- "and %[b0], %[src0], %[c0] \n\t"
- "and %[src0], %[src0], %[c1] \n\t"
- "psrlh %[src0], %[src0], %[five] \n\t"
- "and %[g0], %[dest3_u], %[c2] \n\t"
- "psllh %[g0], %[g0], %[three] \n\t"
- "or %[g0], %[src0], %[g0] \n\t"
- "and %[r0], %[dest3_u], %[c3] \n\t"
- "psrlh %[r0], %[r0], %[two] \n\t"
- "psrlh %[src0], %[src1], %[eight] \n\t"
- "and %[dest3_u], %[src1], %[c0] \n\t"
- "and %[src1], %[src1], %[c1] \n\t"
- "psrlh %[src1], %[src1], %[five] \n\t"
- "and %[dest1_v], %[src0], %[c2] \n\t"
- "psllh %[dest1_v], %[dest1_v], %[three] \n\t"
- "or %[dest1_v], %[src1], %[dest1_v] \n\t"
- "and %[src0], %[src0], %[c3] \n\t"
- "psrlh %[src0], %[src0], %[two] \n\t"
- "paddh %[b0], %[b0], %[dest3_u] \n\t"
- "paddh %[g0], %[g0], %[dest1_v] \n\t"
- "paddh %[r0], %[r0], %[src0] \n\t"
- "punpcklhw %[src0], %[b0], %[r0] \n\t"
- "punpckhhw %[src1], %[b0], %[r0] \n\t"
- "punpcklwd %[dest3_u], %[src0], %[src1] \n\t"
- "punpckhwd %[dest1_v], %[src0], %[src1] \n\t"
- "paddh %[src0], %[dest3_u], %[dest1_v] \n\t"
- "psrlh %[b0], %[src0], %[six] \n\t"
- "psllh %[r0], %[src0], %[one] \n\t"
- "or %[b0], %[b0], %[r0] \n\t"
- "psrlh %[r0], %[g0], %[six] \n\t"
- "psllh %[g0], %[g0], %[one] \n\t"
- "or %[g0], %[g0], %[r0] \n\t"
- "punpcklhw %[src0], %[g0], %[value] \n\t"
- "punpckhhw %[src1], %[g0], %[value] \n\t"
- "punpcklwd %[dest3_u], %[src0], %[src1] \n\t"
- "punpckhwd %[dest1_v], %[src0], %[src1] \n\t"
- "paddh %[g0], %[dest3_u], %[dest1_v] \n\t"
- "punpcklhw %[src0], %[b0], %[g0] \n\t"
- "punpckhhw %[src1], %[b0], %[g0] \n\t"
-
- "pmaddhw %[dest1_v], %[src0], %[mask_v] \n\t"
- "pshufh %[dest3_u], %[src0], %[mask] \n\t"
- "pmaddhw %[dest3_u], %[dest3_u], %[mask_u] \n\t"
- "pmaddhw %[g0], %[src1], %[mask_v] \n\t"
- "pshufh %[b0], %[src1], %[mask] \n\t"
- "pmaddhw %[b0], %[b0], %[mask_u] \n\t"
-
- "punpcklwd %[src0], %[dest3_u], %[b0] \n\t"
- "punpckhwd %[src1], %[dest3_u], %[b0] \n\t"
- "psubw %[dest3_u], %[src0], %[src1] \n\t"
- "psraw %[dest3_u], %[dest3_u], %[eight] \n\t"
- "punpcklwd %[src0], %[dest1_v], %[g0] \n\t"
- "punpckhwd %[src1], %[dest1_v], %[g0] \n\t"
- "psubw %[dest1_v], %[src1], %[src0] \n\t"
- "psraw %[dest1_v], %[dest1_v], %[eight] \n\t"
-
- "packsswh %[src1], %[dest2_u], %[dest3_u] \n\t"
- "packushb %[dest0_u], %[dest0_u], %[src1] \n\t"
- "gssdlc1 %[dest0_u], 0x07(%[dst_u]) \n\t"
- "gssdrc1 %[dest0_u], 0x00(%[dst_u]) \n\t"
- "packsswh %[src1], %[dest0_v], %[dest1_v] \n\t"
- "packushb %[dest0_v], %[dest1_u], %[src1] \n\t"
- "gssdlc1 %[dest0_v], 0x07(%[dst_v]) \n\t"
- "gssdrc1 %[dest0_v], 0x00(%[dst_v]) \n\t"
-
- "daddiu %[src_argb1555], %[src_argb1555], 0x20 \n\t"
- "daddiu %[next_argb1555], %[next_argb1555], 0x20 \n\t"
- "daddiu %[dst_u], %[dst_u], 0x08 \n\t"
- "daddiu %[dst_v], %[dst_v], 0x08 \n\t"
- "daddiu %[width], %[width], -0x10 \n\t"
- "bgtz %[width], 1b \n\t"
- : [src0] "=&f"(ftmp[0]), [src1] "=&f"(ftmp[1]), [b0] "=&f"(ftmp[2]),
- [g0] "=&f"(ftmp[3]), [r0] "=&f"(ftmp[4]), [dest0_u] "=&f"(ftmp[5]),
- [dest1_u] "=&f"(ftmp[6]), [dest2_u] "=&f"(ftmp[7]),
- [dest3_u] "=&f"(ftmp[8]), [dest0_v] "=&f"(ftmp[9]),
- [dest1_v] "=&f"(ftmp[10])
- : [src_argb1555] "r"(src_argb1555),
- [next_argb1555] "r"(src_stride_argb1555), [dst_u] "r"(dst_u),
- [dst_v] "r"(dst_v), [width] "r"(width), [value] "f"(value),
- [c0] "f"(c0), [c1] "f"(c1), [c2] "f"(c2), [c3] "f"(c3),
- [mask] "f"(mask), [mask_u] "f"(mask_u), [mask_v] "f"(mask_v),
- [eight] "f"(0x08), [six] "f"(0x06), [five] "f"(0x05), [three] "f"(0x03),
- [two] "f"(0x02), [one] "f"(0x01)
- : "memory");
-}
-
-void ARGB4444ToUVRow_MMI(const uint8_t* src_argb4444,
- int src_stride_argb4444,
- uint8_t* dst_u,
- uint8_t* dst_v,
- int width) {
- uint64_t ftmp[13];
- uint64_t value = 0x2020202020202020;
- uint64_t mask_u = 0x0026004a00700002;
- uint64_t mask_v = 0x00020070005e0012;
- uint64_t mask = 0x93;
- uint64_t c0 = 0x000f000f000f000f;
- uint64_t c1 = 0x00ff00ff00ff00ff;
- __asm__ volatile(
- "daddu %[next_argb4444], %[src_argb4444], %[next_argb4444] \n\t"
- "1: \n\t"
- "gsldrc1 %[src0], 0x00(%[src_argb4444]) \n\t"
- "gsldlc1 %[src0], 0x07(%[src_argb4444]) \n\t"
- "gsldrc1 %[src1], 0x00(%[next_argb4444]) \n\t"
- "gsldlc1 %[src1], 0x07(%[next_argb4444]) \n\t"
- "psrlh %[dest0_u], %[src0], %[eight] \n\t"
- "and %[b0], %[src0], %[c0] \n\t"
- "and %[src0], %[src0], %[c1] \n\t"
- "psrlh %[g0], %[src0], %[four] \n\t"
- "and %[r0], %[dest0_u], %[c0] \n\t"
- "psrlh %[src0], %[src1], %[eight] \n\t"
- "and %[dest0_u], %[src1], %[c0] \n\t"
- "and %[src1], %[src1], %[c1] \n\t"
- "psrlh %[dest0_v], %[src1], %[four] \n\t"
- "and %[src0], %[src0], %[c0] \n\t"
- "paddh %[b0], %[b0], %[dest0_u] \n\t"
- "paddh %[g0], %[g0], %[dest0_v] \n\t"
- "paddh %[r0], %[r0], %[src0] \n\t"
- "punpcklhw %[src0], %[b0], %[r0] \n\t"
- "punpckhhw %[src1], %[b0], %[r0] \n\t"
- "punpcklwd %[dest0_u], %[src0], %[src1] \n\t"
- "punpckhwd %[dest0_v], %[src0], %[src1] \n\t"
- "paddh %[src0], %[dest0_u], %[dest0_v] \n\t"
- "psrlh %[b0], %[src0], %[four] \n\t"
- "psllh %[r0], %[src0], %[two] \n\t"
- "or %[b0], %[b0], %[r0] \n\t"
- "psrlh %[r0], %[g0], %[four] \n\t"
- "psllh %[g0], %[g0], %[two] \n\t"
- "or %[g0], %[g0], %[r0] \n\t"
- "punpcklhw %[src0], %[g0], %[value] \n\t"
- "punpckhhw %[src1], %[g0], %[value] \n\t"
- "punpcklwd %[dest0_u], %[src0], %[src1] \n\t"
- "punpckhwd %[dest0_v], %[src0], %[src1] \n\t"
- "paddh %[g0], %[dest0_u], %[dest0_v] \n\t"
- "punpcklhw %[src0], %[b0], %[g0] \n\t"
- "punpckhhw %[src1], %[b0], %[g0] \n\t"
-
- "pmaddhw %[dest0_v], %[src0], %[mask_v] \n\t"
- "pshufh %[dest0_u], %[src0], %[mask] \n\t"
- "pmaddhw %[dest0_u], %[dest0_u], %[mask_u] \n\t"
- "pmaddhw %[g0], %[src1], %[mask_v] \n\t"
- "pshufh %[b0], %[src1], %[mask] \n\t"
- "pmaddhw %[b0], %[b0], %[mask_u] \n\t"
-
- "punpcklwd %[src0], %[dest0_u], %[b0] \n\t"
- "punpckhwd %[src1], %[dest0_u], %[b0] \n\t"
- "psubw %[dest0_u], %[src0], %[src1] \n\t"
- "psraw %[dest0_u], %[dest0_u], %[eight] \n\t"
- "punpcklwd %[src0], %[dest0_v], %[g0] \n\t"
- "punpckhwd %[src1], %[dest0_v], %[g0] \n\t"
- "psubw %[dest0_v], %[src1], %[src0] \n\t"
- "psraw %[dest0_v], %[dest0_v], %[eight] \n\t"
-
- "gsldrc1 %[src0], 0x08(%[src_argb4444]) \n\t"
- "gsldlc1 %[src0], 0x0f(%[src_argb4444]) \n\t"
- "gsldrc1 %[src1], 0x08(%[next_argb4444]) \n\t"
- "gsldlc1 %[src1], 0x0f(%[next_argb4444]) \n\t"
- "psrlh %[dest1_u], %[src0], %[eight] \n\t"
- "and %[b0], %[src0], %[c0] \n\t"
- "and %[src0], %[src0], %[c1] \n\t"
- "psrlh %[g0], %[src0], %[four] \n\t"
- "and %[r0], %[dest1_u], %[c0] \n\t"
- "psrlh %[src0], %[src1], %[eight] \n\t"
- "and %[dest1_u], %[src1], %[c0] \n\t"
- "and %[src1], %[src1], %[c1] \n\t"
- "psrlh %[dest1_v], %[src1], %[four] \n\t"
- "and %[src0], %[src0], %[c0] \n\t"
- "paddh %[b0], %[b0], %[dest1_u] \n\t"
- "paddh %[g0], %[g0], %[dest1_v] \n\t"
- "paddh %[r0], %[r0], %[src0] \n\t"
- "punpcklhw %[src0], %[b0], %[r0] \n\t"
- "punpckhhw %[src1], %[b0], %[r0] \n\t"
- "punpcklwd %[dest1_u], %[src0], %[src1] \n\t"
- "punpckhwd %[dest1_v], %[src0], %[src1] \n\t"
- "paddh %[src0], %[dest1_u], %[dest1_v] \n\t"
- "psrlh %[b0], %[src0], %[four] \n\t"
- "psllh %[r0], %[src0], %[two] \n\t"
- "or %[b0], %[b0], %[r0] \n\t"
- "psrlh %[r0], %[g0], %[four] \n\t"
- "psllh %[g0], %[g0], %[two] \n\t"
- "or %[g0], %[g0], %[r0] \n\t"
- "punpcklhw %[src0], %[g0], %[value] \n\t"
- "punpckhhw %[src1], %[g0], %[value] \n\t"
- "punpcklwd %[dest1_u], %[src0], %[src1] \n\t"
- "punpckhwd %[dest1_v], %[src0], %[src1] \n\t"
- "paddh %[g0], %[dest1_u], %[dest1_v] \n\t"
- "punpcklhw %[src0], %[b0], %[g0] \n\t"
- "punpckhhw %[src1], %[b0], %[g0] \n\t"
-
- "pmaddhw %[dest1_v], %[src0], %[mask_v] \n\t"
- "pshufh %[dest1_u], %[src0], %[mask] \n\t"
- "pmaddhw %[dest1_u], %[dest1_u], %[mask_u] \n\t"
- "pmaddhw %[g0], %[src1], %[mask_v] \n\t"
- "pshufh %[b0], %[src1], %[mask] \n\t"
- "pmaddhw %[b0], %[b0], %[mask_u] \n\t"
-
- "punpcklwd %[src0], %[dest1_u], %[b0] \n\t"
- "punpckhwd %[src1], %[dest1_u], %[b0] \n\t"
- "psubw %[dest1_u], %[src0], %[src1] \n\t"
- "psraw %[dest1_u], %[dest1_u], %[eight] \n\t"
- "punpcklwd %[src0], %[dest1_v], %[g0] \n\t"
- "punpckhwd %[src1], %[dest1_v], %[g0] \n\t"
- "psubw %[dest1_v], %[src1], %[src0] \n\t"
- "psraw %[dest1_v], %[dest1_v], %[eight] \n\t"
-
- "gsldrc1 %[src0], 0x10(%[src_argb4444]) \n\t"
- "gsldlc1 %[src0], 0x17(%[src_argb4444]) \n\t"
- "gsldrc1 %[src1], 0x10(%[next_argb4444]) \n\t"
- "gsldlc1 %[src1], 0x17(%[next_argb4444]) \n\t"
- "psrlh %[dest2_u], %[src0], %[eight] \n\t"
- "and %[b0], %[src0], %[c0] \n\t"
- "and %[src0], %[src0], %[c1] \n\t"
- "psrlh %[g0], %[src0], %[four] \n\t"
- "and %[r0], %[dest2_u], %[c0] \n\t"
- "psrlh %[src0], %[src1], %[eight] \n\t"
- "and %[dest2_u], %[src1], %[c0] \n\t"
- "and %[src1], %[src1], %[c1] \n\t"
- "psrlh %[dest2_v], %[src1], %[four] \n\t"
- "and %[src0], %[src0], %[c0] \n\t"
- "paddh %[b0], %[b0], %[dest2_u] \n\t"
- "paddh %[g0], %[g0], %[dest2_v] \n\t"
- "paddh %[r0], %[r0], %[src0] \n\t"
- "punpcklhw %[src0], %[b0], %[r0] \n\t"
- "punpckhhw %[src1], %[b0], %[r0] \n\t"
- "punpcklwd %[dest2_u], %[src0], %[src1] \n\t"
- "punpckhwd %[dest2_v], %[src0], %[src1] \n\t"
- "paddh %[src0], %[dest2_u], %[dest2_v] \n\t"
- "psrlh %[b0], %[src0], %[four] \n\t"
- "psllh %[r0], %[src0], %[two] \n\t"
- "or %[b0], %[b0], %[r0] \n\t"
- "psrlh %[r0], %[g0], %[four] \n\t"
- "psllh %[g0], %[g0], %[two] \n\t"
- "or %[g0], %[g0], %[r0] \n\t"
- "punpcklhw %[src0], %[g0], %[value] \n\t"
- "punpckhhw %[src1], %[g0], %[value] \n\t"
- "punpcklwd %[dest2_u], %[src0], %[src1] \n\t"
- "punpckhwd %[dest2_v], %[src0], %[src1] \n\t"
- "paddh %[g0], %[dest2_u], %[dest2_v] \n\t"
- "punpcklhw %[src0], %[b0], %[g0] \n\t"
- "punpckhhw %[src1], %[b0], %[g0] \n\t"
-
- "pmaddhw %[dest2_v], %[src0], %[mask_v] \n\t"
- "pshufh %[dest2_u], %[src0], %[mask] \n\t"
- "pmaddhw %[dest2_u], %[dest2_u], %[mask_u] \n\t"
- "pmaddhw %[g0], %[src1], %[mask_v] \n\t"
- "pshufh %[b0], %[src1], %[mask] \n\t"
- "pmaddhw %[b0], %[b0], %[mask_u] \n\t"
-
- "punpcklwd %[src0], %[dest2_u], %[b0] \n\t"
- "punpckhwd %[src1], %[dest2_u], %[b0] \n\t"
- "psubw %[dest2_u], %[src0], %[src1] \n\t"
- "psraw %[dest2_u], %[dest2_u], %[eight] \n\t"
- "punpcklwd %[src0], %[dest2_v], %[g0] \n\t"
- "punpckhwd %[src1], %[dest2_v], %[g0] \n\t"
- "psubw %[dest2_v], %[src1], %[src0] \n\t"
- "psraw %[dest2_v], %[dest2_v], %[eight] \n\t"
-
- "gsldrc1 %[src0], 0x18(%[src_argb4444]) \n\t"
- "gsldlc1 %[src0], 0x1f(%[src_argb4444]) \n\t"
- "gsldrc1 %[src1], 0x18(%[next_argb4444]) \n\t"
- "gsldlc1 %[src1], 0x1f(%[next_argb4444]) \n\t"
- "psrlh %[dest3_u], %[src0], %[eight] \n\t"
- "and %[b0], %[src0], %[c0] \n\t"
- "and %[src0], %[src0], %[c1] \n\t"
- "psrlh %[g0], %[src0], %[four] \n\t"
- "and %[r0], %[dest3_u], %[c0] \n\t"
- "psrlh %[src0], %[src1], %[eight] \n\t"
- "and %[dest3_u], %[src1], %[c0] \n\t"
- "and %[src1], %[src1], %[c1] \n\t"
- "psrlh %[dest3_v], %[src1], %[four] \n\t"
- "and %[src0], %[src0], %[c0] \n\t"
- "paddh %[b0], %[b0], %[dest3_u] \n\t"
- "paddh %[g0], %[g0], %[dest3_v] \n\t"
- "paddh %[r0], %[r0], %[src0] \n\t"
- "punpcklhw %[src0], %[b0], %[r0] \n\t"
- "punpckhhw %[src1], %[b0], %[r0] \n\t"
- "punpcklwd %[dest3_u], %[src0], %[src1] \n\t"
- "punpckhwd %[dest3_v], %[src0], %[src1] \n\t"
- "paddh %[src0], %[dest3_u], %[dest3_v] \n\t"
- "psrlh %[b0], %[src0], %[four] \n\t"
- "psllh %[r0], %[src0], %[two] \n\t"
- "or %[b0], %[b0], %[r0] \n\t"
- "psrlh %[r0], %[g0], %[four] \n\t"
- "psllh %[g0], %[g0], %[two] \n\t"
- "or %[g0], %[g0], %[r0] \n\t"
- "punpcklhw %[src0], %[g0], %[value] \n\t"
- "punpckhhw %[src1], %[g0], %[value] \n\t"
- "punpcklwd %[dest3_u], %[src0], %[src1] \n\t"
- "punpckhwd %[dest3_v], %[src0], %[src1] \n\t"
- "paddh %[g0], %[dest3_u], %[dest3_v] \n\t"
- "punpcklhw %[src0], %[b0], %[g0] \n\t"
- "punpckhhw %[src1], %[b0], %[g0] \n\t"
-
- "pmaddhw %[dest3_v], %[src0], %[mask_v] \n\t"
- "pshufh %[dest3_u], %[src0], %[mask] \n\t"
- "pmaddhw %[dest3_u], %[dest3_u], %[mask_u] \n\t"
- "pmaddhw %[g0], %[src1], %[mask_v] \n\t"
- "pshufh %[b0], %[src1], %[mask] \n\t"
- "pmaddhw %[b0], %[b0], %[mask_u] \n\t"
-
- "punpcklwd %[src0], %[dest3_u], %[b0] \n\t"
- "punpckhwd %[src1], %[dest3_u], %[b0] \n\t"
- "psubw %[dest3_u], %[src0], %[src1] \n\t"
- "psraw %[dest3_u], %[dest3_u], %[eight] \n\t"
- "punpcklwd %[src0], %[dest3_v], %[g0] \n\t"
- "punpckhwd %[src1], %[dest3_v], %[g0] \n\t"
- "psubw %[dest3_v], %[src1], %[src0] \n\t"
- "psraw %[dest3_v], %[dest3_v], %[eight] \n\t"
-
- "packsswh %[src0], %[dest0_u], %[dest1_u] \n\t"
- "packsswh %[src1], %[dest2_u], %[dest3_u] \n\t"
- "packushb %[dest0_u], %[src0], %[src1] \n\t"
- "gssdlc1 %[dest0_u], 0x07(%[dst_u]) \n\t"
- "gssdrc1 %[dest0_u], 0x00(%[dst_u]) \n\t"
- "packsswh %[src0], %[dest0_v], %[dest1_v] \n\t"
- "packsswh %[src1], %[dest2_v], %[dest3_v] \n\t"
- "packushb %[dest0_v], %[src0], %[src1] \n\t"
- "gssdlc1 %[dest0_v], 0x07(%[dst_v]) \n\t"
- "gssdrc1 %[dest0_v], 0x00(%[dst_v]) \n\t"
-
- "daddiu %[src_argb4444], %[src_argb4444], 0x20 \n\t"
- "daddiu %[next_argb4444], %[next_argb4444], 0x20 \n\t"
- "daddiu %[dst_u], %[dst_u], 0x08 \n\t"
- "daddiu %[dst_v], %[dst_v], 0x08 \n\t"
- "daddiu %[width], %[width], -0x10 \n\t"
- "bgtz %[width], 1b \n\t"
- : [src0] "=&f"(ftmp[0]), [src1] "=&f"(ftmp[1]), [b0] "=&f"(ftmp[2]),
- [g0] "=&f"(ftmp[3]), [r0] "=&f"(ftmp[4]), [dest0_u] "=&f"(ftmp[5]),
- [dest1_u] "=&f"(ftmp[6]), [dest2_u] "=&f"(ftmp[7]),
- [dest3_u] "=&f"(ftmp[8]), [dest0_v] "=&f"(ftmp[9]),
- [dest1_v] "=&f"(ftmp[10]), [dest2_v] "=&f"(ftmp[11]),
- [dest3_v] "=&f"(ftmp[12])
- : [src_argb4444] "r"(src_argb4444),
- [next_argb4444] "r"(src_stride_argb4444), [dst_u] "r"(dst_u),
- [dst_v] "r"(dst_v), [width] "r"(width), [value] "f"(value),
- [c0] "f"(c0), [c1] "f"(c1), [mask] "f"(mask), [mask_u] "f"(mask_u),
- [mask_v] "f"(mask_v), [eight] "f"(0x08), [four] "f"(0x04),
- [two] "f"(0x02)
- : "memory");
-}
-
-void ARGBToUV444Row_MMI(const uint8_t* src_argb,
- uint8_t* dst_u,
- uint8_t* dst_v,
- int width) {
- uint64_t ftmp[12];
- const uint64_t value = 0x4040;
- const uint64_t mask_u = 0x0026004a00700002;
- const uint64_t mask_v = 0x00020070005e0012;
-
- __asm__ volatile(
- "1: \n\t"
- "gsldrc1 %[src0], 0x00(%[src_argb]) \n\t"
- "gsldlc1 %[src0], 0x07(%[src_argb]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "dsll %[dest0_u], %[src_lo], %[sixteen] \n\t"
- "pinsrh_0 %[dest0_u], %[dest0_u], %[value] \n\t"
- "pinsrh_3 %[dest0_v], %[src_lo], %[value] \n\t"
- "pmaddhw %[dest0_u], %[dest0_u], %[mask_u] \n\t"
- "pmaddhw %[dest0_v], %[dest0_v], %[mask_v] \n\t"
-
- "dsll %[src_lo], %[src_hi], %[sixteen] \n\t"
- "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
- "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
-
- "punpcklwd %[src0], %[dest0_u], %[src_lo] \n\t"
- "punpckhwd %[src1], %[dest0_u], %[src_lo] \n\t"
- "psubw %[dest0_u], %[src0], %[src1] \n\t"
- "psraw %[dest0_u], %[dest0_u], %[eight] \n\t"
- "punpcklwd %[src0], %[dest0_v], %[src_hi] \n\t"
- "punpckhwd %[src1], %[dest0_v], %[src_hi] \n\t"
- "psubw %[dest0_v], %[src1], %[src0] \n\t"
- "psraw %[dest0_v], %[dest0_v], %[eight] \n\t"
-
- "gsldrc1 %[src0], 0x08(%[src_argb]) \n\t"
- "gsldlc1 %[src0], 0x0f(%[src_argb]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "dsll %[dest1_u], %[src_lo], %[sixteen] \n\t"
- "pinsrh_0 %[dest1_u], %[dest1_u], %[value] \n\t"
- "pinsrh_3 %[dest1_v], %[src_lo], %[value] \n\t"
- "pmaddhw %[dest1_u], %[dest1_u], %[mask_u] \n\t"
- "pmaddhw %[dest1_v], %[dest1_v], %[mask_v] \n\t"
- "dsll %[src_lo], %[src_hi], %[sixteen] \n\t"
- "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
- "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
-
- "punpcklwd %[src0], %[dest1_u], %[src_lo] \n\t"
- "punpckhwd %[src1], %[dest1_u], %[src_lo] \n\t"
- "psubw %[dest1_u], %[src0], %[src1] \n\t"
- "psraw %[dest1_u], %[dest1_u], %[eight] \n\t"
- "punpcklwd %[src0], %[dest1_v], %[src_hi] \n\t"
- "punpckhwd %[src1], %[dest1_v], %[src_hi] \n\t"
- "psubw %[dest1_v], %[src1], %[src0] \n\t"
- "psraw %[dest1_v], %[dest1_v], %[eight] \n\t"
-
- "gsldrc1 %[src0], 0x10(%[src_argb]) \n\t"
- "gsldlc1 %[src0], 0x17(%[src_argb]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "dsll %[dest2_u], %[src_lo], %[sixteen] \n\t"
- "pinsrh_0 %[dest2_u], %[dest2_u], %[value] \n\t"
- "pinsrh_3 %[dest2_v], %[src_lo], %[value] \n\t"
- "pmaddhw %[dest2_u], %[dest2_u], %[mask_u] \n\t"
- "pmaddhw %[dest2_v], %[dest2_v], %[mask_v] \n\t"
- "dsll %[src_lo], %[src_hi], %[sixteen] \n\t"
- "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
- "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
-
- "punpcklwd %[src0], %[dest2_u], %[src_lo] \n\t"
- "punpckhwd %[src1], %[dest2_u], %[src_lo] \n\t"
- "psubw %[dest2_u], %[src0], %[src1] \n\t"
- "psraw %[dest2_u], %[dest2_u], %[eight] \n\t"
- "punpcklwd %[src0], %[dest2_v], %[src_hi] \n\t"
- "punpckhwd %[src1], %[dest2_v], %[src_hi] \n\t"
- "psubw %[dest2_v], %[src1], %[src0] \n\t"
- "psraw %[dest2_v], %[dest2_v], %[eight] \n\t"
-
- "gsldrc1 %[src0], 0x18(%[src_argb]) \n\t"
- "gsldlc1 %[src0], 0x1f(%[src_argb]) \n\t"
- "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
- "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
- "dsll %[dest3_u], %[src_lo], %[sixteen] \n\t"
- "pinsrh_0 %[dest3_u], %[dest3_u], %[value] \n\t"
- "pinsrh_3 %[dest3_v], %[src_lo], %[value] \n\t"
- "pmaddhw %[dest3_u], %[dest3_u], %[mask_u] \n\t"
- "pmaddhw %[dest3_v], %[dest3_v], %[mask_v] \n\t"
- "dsll %[src_lo], %[src_hi], %[sixteen] \n\t"
- "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
- "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
- "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
- "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
-
- "punpcklwd %[src0], %[dest3_u], %[src_lo] \n\t"
- "punpckhwd %[src1], %[dest3_u], %[src_lo] \n\t"
- "psubw %[dest3_u], %[src0], %[src1] \n\t"
- "psraw %[dest3_u], %[dest3_u], %[eight] \n\t"
- "punpcklwd %[src0], %[dest3_v], %[src_hi] \n\t"
- "punpckhwd %[src1], %[dest3_v], %[src_hi] \n\t"
- "psubw %[dest3_v], %[src1], %[src0] \n\t"
- "psraw %[dest3_v], %[dest3_v], %[eight] \n\t"
-
- "packsswh %[src0], %[dest0_u], %[dest1_u] \n\t"
- "packsswh %[src1], %[dest2_u], %[dest3_u] \n\t"
- "packushb %[dest0_u], %[src0], %[src1] \n\t"
- "gssdlc1 %[dest0_u], 0x07(%[dst_u]) \n\t"
- "gssdrc1 %[dest0_u], 0x00(%[dst_u]) \n\t"
-
- "packsswh %[src0], %[dest0_v], %[dest1_v] \n\t"
- "packsswh %[src1], %[dest2_v], %[dest3_v] \n\t"
- "packushb %[dest0_v], %[src0], %[src1] \n\t"
- "gssdlc1 %[dest0_v], 0x07(%[dst_v]) \n\t"
- "gssdrc1 %[dest0_v], 0x00(%[dst_v]) \n\t"
-
- "daddiu %[src_argb], %[src_argb], 0x20 \n\t"
- "daddiu %[dst_u], %[dst_u], 0x08 \n\t"
- "daddiu %[dst_v], %[dst_v], 0x08 \n\t"
- "daddi %[width], %[width], -0x08 \n\t"
- "bgtz %[width], 1b \n\t"
- : [src0] "=&f"(ftmp[0]), [src1] "=&f"(ftmp[1]), [src_lo] "=&f"(ftmp[2]),
- [src_hi] "=&f"(ftmp[3]), [dest0_u] "=&f"(ftmp[4]),
- [dest0_v] "=&f"(ftmp[5]), [dest1_u] "=&f"(ftmp[6]),
- [dest1_v] "=&f"(ftmp[7]), [dest2_u] "=&f"(ftmp[8]),
- [dest2_v] "=&f"(ftmp[9]), [dest3_u] "=&f"(ftmp[10]),
- [dest3_v] "=&f"(ftmp[11])
- : [src_argb] "r"(src_argb), [dst_u] "r"(dst_u), [dst_v] "r"(dst_v),
- [width] "r"(width), [mask_u] "f"(mask_u), [mask_v] "f"(mask_v),
- [value] "f"(value), [zero] "f"(0x00), [sixteen] "f"(0x10),
- [eight] "f"(0x08)
- : "memory");
-}
-
-void ARGBGrayRow_MMI(const uint8_t* src_argb, uint8_t* dst_argb, int width) {
- uint64_t src, src_lo, src_hi, src37, dest, dest_lo, dest_hi;
- uint64_t tmp0, tmp1;
- const uint64_t mask0 = 0x0;
- const uint64_t mask1 = 0x01;
- const uint64_t mask2 = 0x0080004D0096001DULL;
- const uint64_t mask3 = 0xFF000000FF000000ULL;
- const uint64_t mask4 = ~mask3;
- const uint64_t shift = 0x08;
-
- __asm__ volatile(
- "1: \n\t"
- "gsldlc1 %[src], 0x07(%[src_ptr]) \n\t"
- "gsldrc1 %[src], 0x00(%[src_ptr]) \n\t"
-
- "and %[src37], %[src], %[mask3] \n\t"
-
- "punpcklbh %[src_lo], %[src], %[mask0] \n\t"
- "pinsrh_3 %[src_lo], %[src_lo], %[mask1] \n\t"
- "pmaddhw %[dest_lo], %[src_lo], %[mask2] \n\t"
- "punpcklwd %[tmp0], %[dest_lo], %[dest_lo] \n\t"
- "punpckhwd %[tmp1], %[dest_lo], %[dest_lo] \n\t"
- "paddw %[dest_lo], %[tmp0], %[tmp1] \n\t"
- "psrlw %[dest_lo], %[dest_lo], %[shift] \n\t"
- "packsswh %[dest_lo], %[dest_lo], %[dest_lo] \n\t"
-
- "punpckhbh %[src_hi], %[src], %[mask0] \n\t"
- "pinsrh_3 %[src_hi], %[src_hi], %[mask1] \n\t"
- "pmaddhw %[dest_hi], %[src_hi], %[mask2] \n\t"
- "punpcklwd %[tmp0], %[dest_hi], %[dest_hi] \n\t"
- "punpckhwd %[tmp1], %[dest_hi], %[dest_hi] \n\t"
- "paddw %[dest_hi], %[tmp0], %[tmp1] \n\t"
- "psrlw %[dest_hi], %[dest_hi], %[shift] \n\t"
- "packsswh %[dest_hi], %[dest_hi], %[dest_hi] \n\t"
-
- "packushb %[dest], %[dest_lo], %[dest_hi] \n\t"
- "and %[dest], %[dest], %[mask4] \n\t"
- "or %[dest], %[dest], %[src37] \n\t"
-
- "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
- "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
-
- "daddiu %[src_ptr], %[src_ptr], 0x08 \n\t"
- "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t"
- "daddi %[width], %[width], -0x02 \n\t"
- "bnez %[width], 1b \n\t"
- : [dest_hi] "=&f"(dest_hi), [dest_lo] "=&f"(dest_lo),
- [src_hi] "=&f"(src_hi), [src_lo] "=&f"(src_lo), [tmp0] "=&f"(tmp0),
- [tmp1] "=&f"(tmp1), [src] "=&f"(src), [dest] "=&f"(dest),
- [src37] "=&f"(src37)
- : [src_ptr] "r"(src_argb), [dst_ptr] "r"(dst_argb), [width] "r"(width),
- [shift] "f"(shift), [mask0] "f"(mask0), [mask1] "f"(mask1),
- [mask2] "f"(mask2), [mask3] "f"(mask3), [mask4] "f"(mask4)
- : "memory");
-}
-
-// Convert a row of image to Sepia tone.
-void ARGBSepiaRow_MMI(uint8_t* dst_argb, int width) {
- uint64_t dest, dest_lo, dest_hi, dest37, dest0, dest1, dest2;
- uint64_t tmp0, tmp1;
- const uint64_t mask0 = 0x0;
- const uint64_t mask1 = 0x002300440011ULL;
- const uint64_t mask2 = 0x002D00580016ULL;
- const uint64_t mask3 = 0x003200620018ULL;
- const uint64_t mask4 = 0xFF000000FF000000ULL;
- const uint64_t shift = 0x07;
-
- __asm__ volatile(
- "1: \n\t"
- "gsldlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
- "gsldrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
-
- "and %[dest37], %[dest], %[mask4] \n\t"
-
- "punpcklbh %[dest_lo], %[dest], %[mask0] \n\t"
- "pmaddhw %[dest0], %[dest_lo], %[mask1] \n\t"
- "pmaddhw %[dest1], %[dest_lo], %[mask2] \n\t"
- "pmaddhw %[dest2], %[dest_lo], %[mask3] \n\t"
- "punpcklwd %[tmp0], %[dest0], %[dest1] \n\t"
- "punpckhwd %[tmp1], %[dest0], %[dest1] \n\t"
- "paddw %[dest0], %[tmp0], %[tmp1] \n\t"
- "psrlw %[dest0], %[dest0], %[shift] \n\t"
- "punpcklwd %[tmp0], %[dest2], %[mask0] \n\t"
- "punpckhwd %[tmp1], %[dest2], %[mask0] \n\t"
- "paddw %[dest1], %[tmp0], %[tmp1] \n\t"
- "psrlw %[dest1], %[dest1], %[shift] \n\t"
- "packsswh %[dest_lo], %[dest0], %[dest1] \n\t"
-
- "punpckhbh %[dest_hi], %[dest], %[mask0] \n\t"
- "pmaddhw %[dest0], %[dest_hi], %[mask1] \n\t"
- "pmaddhw %[dest1], %[dest_hi], %[mask2] \n\t"
- "pmaddhw %[dest2], %[dest_hi], %[mask3] \n\t"
- "punpcklwd %[tmp0], %[dest0], %[dest1] \n\t"
- "punpckhwd %[tmp1], %[dest0], %[dest1] \n\t"
- "paddw %[dest0], %[tmp0], %[tmp1] \n\t"
- "psrlw %[dest0], %[dest0], %[shift] \n\t"
- "punpcklwd %[tmp0], %[dest2], %[mask0] \n\t"
- "punpckhwd %[tmp1], %[dest2], %[mask0] \n\t"
- "paddw %[dest1], %[tmp0], %[tmp1] \n\t"
- "psrlw %[dest1], %[dest1], %[shift] \n\t"
- "packsswh %[dest_hi], %[dest0], %[dest1] \n\t"
-
- "packushb %[dest], %[dest_lo], %[dest_hi] \n\t"
- "or %[dest], %[dest], %[dest37] \n\t"
-
- "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
- "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
-
- "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t"
- "daddi %[width], %[width], -0x02 \n\t"
- "bnez %[width], 1b \n\t"
- : [dest_hi] "=&f"(dest_hi), [dest_lo] "=&f"(dest_lo),
- [dest0] "=&f"(dest0), [dest1] "=&f"(dest1), [dest2] "=&f"(dest2),
- [dest37] "=&f"(dest37), [tmp0] "=&f"(tmp0), [tmp1] "=&f"(tmp1),
- [dest] "=&f"(dest)
- : [dst_ptr] "r"(dst_argb), [width] "r"(width), [mask0] "f"(mask0),
- [mask1] "f"(mask1), [mask2] "f"(mask2), [mask3] "f"(mask3),
- [mask4] "f"(mask4), [shift] "f"(shift)
- : "memory");
-}
-
-// Apply color matrix to a row of image. Matrix is signed.
-// TODO(fbarchard): Consider adding rounding (+32).
-void ARGBColorMatrixRow_MMI(const uint8_t* src_argb,
- uint8_t* dst_argb,
- const int8_t* matrix_argb,
- int width) {
- uint64_t src, src_hi, src_lo, dest, dest_lo, dest_hi, dest0, dest1, dest2,
- dest3;
- uint64_t matrix, matrix_hi, matrix_lo;
- uint64_t tmp0, tmp1;
- const uint64_t shift0 = 0x06;
- const uint64_t shift1 = 0x08;
- const uint64_t mask0 = 0x0;
- const uint64_t mask1 = 0x08;
-
- __asm__ volatile(
- "1: \n\t"
- "gsldlc1 %[src], 0x07(%[src_ptr]) \n\t"
- "gsldrc1 %[src], 0x00(%[src_ptr]) \n\t"
-
- "punpcklbh %[src_lo], %[src], %[mask0] \n\t"
-
- "gsldlc1 %[matrix], 0x07(%[matrix_ptr]) \n\t"
- "gsldrc1 %[matrix], 0x00(%[matrix_ptr]) \n\t"
- "punpcklbh %[matrix_lo], %[matrix], %[mask0] \n\t"
- "psllh %[matrix_lo], %[matrix_lo], %[shift1] \n\t"
- "psrah %[matrix_lo], %[matrix_lo], %[shift1] \n\t"
- "punpckhbh %[matrix_hi], %[matrix], %[mask0] \n\t"
- "psllh %[matrix_hi], %[matrix_hi], %[shift1] \n\t"
- "psrah %[matrix_hi], %[matrix_hi], %[shift1] \n\t"
- "pmaddhw %[dest_lo], %[src_lo], %[matrix_lo] \n\t"
- "pmaddhw %[dest_hi], %[src_lo], %[matrix_hi] \n\t"
- "punpcklwd %[tmp0], %[dest_lo], %[dest_hi] \n\t"
- "punpckhwd %[tmp1], %[dest_lo], %[dest_hi] \n\t"
- "paddw %[dest0], %[tmp0], %[tmp1] \n\t"
- "psraw %[dest0], %[dest0], %[shift0] \n\t"
-
- "gsldlc1 %[matrix], 0x0f(%[matrix_ptr]) \n\t"
- "gsldrc1 %[matrix], 0x08(%[matrix_ptr]) \n\t"
- "punpcklbh %[matrix_lo], %[matrix], %[mask0] \n\t"
- "psllh %[matrix_lo], %[matrix_lo], %[shift1] \n\t"
- "psrah %[matrix_lo], %[matrix_lo], %[shift1] \n\t"
- "punpckhbh %[matrix_hi], %[matrix], %[mask0] \n\t"
- "psllh %[matrix_hi], %[matrix_hi], %[shift1] \n\t"
- "psrah %[matrix_hi], %[matrix_hi], %[shift1] \n\t"
- "pmaddhw %[dest_lo], %[src_lo], %[matrix_lo] \n\t"
- "pmaddhw %[dest_hi], %[src_lo], %[matrix_hi] \n\t"
- "punpcklwd %[tmp0], %[dest_lo], %[dest_hi] \n\t"
- "punpckhwd %[tmp1], %[dest_lo], %[dest_hi] \n\t"
- "paddw %[dest1], %[tmp0], %[tmp1] \n\t"
- "psraw %[dest1], %[dest1], %[shift0] \n\t"
-
- "punpckhbh %[src_hi], %[src], %[mask0] \n\t"
-
- "gsldlc1 %[matrix], 0x07(%[matrix_ptr]) \n\t"
- "gsldrc1 %[matrix], 0x00(%[matrix_ptr]) \n\t"
- "punpcklbh %[matrix_lo], %[matrix], %[mask0] \n\t"
- "psllh %[matrix_lo], %[matrix_lo], %[shift1] \n\t"
- "psrah %[matrix_lo], %[matrix_lo], %[shift1] \n\t"
- "punpckhbh %[matrix_hi], %[matrix], %[mask0] \n\t"
- "psllh %[matrix_hi], %[matrix_hi], %[shift1] \n\t"
- "psrah %[matrix_hi], %[matrix_hi], %[shift1] \n\t"
- "pmaddhw %[dest_lo], %[src_hi], %[matrix_lo] \n\t"
- "pmaddhw %[dest_hi], %[src_hi], %[matrix_hi] \n\t"
- "punpcklwd %[tmp0], %[dest_lo], %[dest_hi] \n\t"
- "punpckhwd %[tmp1], %[dest_lo], %[dest_hi] \n\t"
- "paddw %[dest2], %[tmp0], %[tmp1] \n\t"
- "psraw %[dest2], %[dest2], %[shift0] \n\t"
-
- "gsldlc1 %[matrix], 0x0f(%[matrix_ptr]) \n\t"
- "gsldrc1 %[matrix], 0x08(%[matrix_ptr]) \n\t"
- "punpcklbh %[matrix_lo], %[matrix], %[mask0] \n\t"
- "psllh %[matrix_lo], %[matrix_lo], %[shift1] \n\t"
- "psrah %[matrix_lo], %[matrix_lo], %[shift1] \n\t"
- "punpckhbh %[matrix_hi], %[matrix], %[mask0] \n\t"
- "psllh %[matrix_hi], %[matrix_hi], %[shift1] \n\t"
- "psrah %[matrix_hi], %[matrix_hi], %[shift1] \n\t"
- "pmaddhw %[dest_lo], %[src_hi], %[matrix_lo] \n\t"
- "pmaddhw %[dest_hi], %[src_hi], %[matrix_hi] \n\t"
- "punpcklwd %[tmp0], %[dest_lo], %[dest_hi] \n\t"
- "punpckhwd %[tmp1], %[dest_lo], %[dest_hi] \n\t"
- "paddw %[dest3], %[tmp0], %[tmp1] \n\t"
- "psraw %[dest3], %[dest3], %[shift0] \n\t"
-
- "packsswh %[tmp0], %[dest0], %[dest1] \n\t"
- "packsswh %[tmp1], %[dest2], %[dest3] \n\t"
- "packushb %[dest], %[tmp0], %[tmp1] \n\t"
-
- "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
- "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
-
- "daddiu %[src_ptr], %[src_ptr], 0x08 \n\t"
- "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t"
- "daddi %[width], %[width], -0x02 \n\t"
- "bnez %[width], 1b \n\t"
- : [src_hi] "=&f"(src_hi), [src_lo] "=&f"(src_lo),
- [dest_hi] "=&f"(dest_hi), [dest_lo] "=&f"(dest_lo),
- [dest0] "=&f"(dest0), [dest1] "=&f"(dest1), [dest2] "=&f"(dest2),
- [dest3] "=&f"(dest3), [src] "=&f"(src), [dest] "=&f"(dest),
- [tmp0] "=&f"(tmp0), [tmp1] "=&f"(tmp1), [matrix_hi] "=&f"(matrix_hi),
- [matrix_lo] "=&f"(matrix_lo), [matrix] "=&f"(matrix)
- : [src_ptr] "r"(src_argb), [matrix_ptr] "r"(matrix_argb),
- [dst_ptr] "r"(dst_argb), [width] "r"(width), [shift0] "f"(shift0),
- [shift1] "f"(shift1), [mask0] "f"(mask0), [mask1] "f"(mask1)
- : "memory");
-}
-
-void ARGBShadeRow_MMI(const uint8_t* src_argb,
- uint8_t* dst_argb,
- int width,
- uint32_t value) {
- uint64_t src, src_hi, src_lo, dest, dest_lo, dest_hi;
- const uint64_t shift = 0x08;
-
- __asm__ volatile(
- "1: \n\t"
- "gsldlc1 %[src], 0x07(%[src_ptr]) \n\t"
- "gsldrc1 %[src], 0x00(%[src_ptr]) \n\t"
- "punpcklbh %[src_lo], %[src], %[src] \n\t"
- "punpckhbh %[src_hi], %[src], %[src] \n\t"
-
- "punpcklbh %[value], %[value], %[value] \n\t"
-
- "pmulhuh %[dest_lo], %[src_lo], %[value] \n\t"
- "psrlh %[dest_lo], %[dest_lo], %[shift] \n\t"
- "pmulhuh %[dest_hi], %[src_hi], %[value] \n\t"
- "psrlh %[dest_hi], %[dest_hi], %[shift] \n\t"
- "packushb %[dest], %[dest_lo], %[dest_hi] \n\t"
-
- "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
- "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
-
- "daddiu %[src_ptr], %[src_ptr], 0x08 \n\t"
- "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t"
- "daddi %[width], %[width], -0x02 \n\t"
- "bnez %[width], 1b \n\t"
- : [src_hi] "=&f"(src_hi), [src_lo] "=&f"(src_lo),
- [dest_hi] "=&f"(dest_hi), [dest_lo] "=&f"(dest_lo), [src] "=&f"(src),
- [dest] "=&f"(dest)
- : [src_ptr] "r"(src_argb), [dst_ptr] "r"(dst_argb), [width] "r"(width),
- [value] "f"(value), [shift] "f"(shift)
- : "memory");
-}
-
-void ARGBMultiplyRow_MMI(const uint8_t* src_argb,
- const uint8_t* src_argb1,
- uint8_t* dst_argb,
- int width) {
- uint64_t src0, src0_hi, src0_lo, src1, src1_hi, src1_lo;
- uint64_t dest, dest_lo, dest_hi;
- const uint64_t mask = 0x0;
-
- __asm__ volatile(
- "1: \n\t"
- "gsldlc1 %[src0], 0x07(%[src0_ptr]) \n\t"
- "gsldrc1 %[src0], 0x00(%[src0_ptr]) \n\t"
- "punpcklbh %[src0_lo], %[src0], %[src0] \n\t"
- "punpckhbh %[src0_hi], %[src0], %[src0] \n\t"
-
- "gsldlc1 %[src1], 0x07(%[src1_ptr]) \n\t"
- "gsldrc1 %[src1], 0x00(%[src1_ptr]) \n\t"
- "punpcklbh %[src1_lo], %[src1], %[mask] \n\t"
- "punpckhbh %[src1_hi], %[src1], %[mask] \n\t"
-
- "pmulhuh %[dest_lo], %[src0_lo], %[src1_lo] \n\t"
- "pmulhuh %[dest_hi], %[src0_hi], %[src1_hi] \n\t"
- "packushb %[dest], %[dest_lo], %[dest_hi] \n\t"
-
- "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
- "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
-
- "daddiu %[src0_ptr], %[src0_ptr], 0x08 \n\t"
- "daddiu %[src1_ptr], %[src1_ptr], 0x08 \n\t"
- "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t"
- "daddi %[width], %[width], -0x02 \n\t"
- "bnez %[width], 1b \n\t"
- : [src0_hi] "=&f"(src0_hi), [src0_lo] "=&f"(src0_lo),
- [src1_hi] "=&f"(src1_hi), [src1_lo] "=&f"(src1_lo),
- [dest_hi] "=&f"(dest_hi), [dest_lo] "=&f"(dest_lo), [src0] "=&f"(src0),
- [src1] "=&f"(src1), [dest] "=&f"(dest)
- : [src0_ptr] "r"(src_argb), [src1_ptr] "r"(src_argb1),
- [dst_ptr] "r"(dst_argb), [width] "r"(width), [mask] "f"(mask)
- : "memory");
-}
-
-void ARGBAddRow_MMI(const uint8_t* src_argb,
- const uint8_t* src_argb1,
- uint8_t* dst_argb,
- int width) {
- uint64_t src0, src1, dest;
-
- __asm__ volatile(
- "1: \n\t"
- "gsldlc1 %[src0], 0x07(%[src0_ptr]) \n\t"
- "gsldrc1 %[src0], 0x00(%[src0_ptr]) \n\t"
- "gsldlc1 %[src1], 0x07(%[src1_ptr]) \n\t"
- "gsldrc1 %[src1], 0x00(%[src1_ptr]) \n\t"
- "paddusb %[dest], %[src0], %[src1] \n\t"
- "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
- "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
-
- "daddiu %[src0_ptr], %[src0_ptr], 0x08 \n\t"
- "daddiu %[src1_ptr], %[src1_ptr], 0x08 \n\t"
- "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t"
- "daddi %[width], %[width], -0x02 \n\t"
- "bnez %[width], 1b \n\t"
- : [src0] "=&f"(src0), [src1] "=&f"(src1), [dest] "=&f"(dest)
- : [src0_ptr] "r"(src_argb), [src1_ptr] "r"(src_argb1),
- [dst_ptr] "r"(dst_argb), [width] "r"(width)
- : "memory");
-}
-
-void ARGBSubtractRow_MMI(const uint8_t* src_argb,
- const uint8_t* src_argb1,
- uint8_t* dst_argb,
- int width) {
- uint64_t src0, src1, dest;
-
- __asm__ volatile(
- "1: \n\t"
- "gsldlc1 %[src0], 0x07(%[src0_ptr]) \n\t"
- "gsldrc1 %[src0], 0x00(%[src0_ptr]) \n\t"
- "gsldlc1 %[src1], 0x07(%[src1_ptr]) \n\t"
- "gsldrc1 %[src1], 0x00(%[src1_ptr]) \n\t"
- "psubusb %[dest], %[src0], %[src1] \n\t"
- "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
- "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
-
- "daddiu %[src0_ptr], %[src0_ptr], 0x08 \n\t"
- "daddiu %[src1_ptr], %[src1_ptr], 0x08 \n\t"
- "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t"
- "daddi %[width], %[width], -0x02 \n\t"
- "bnez %[width], 1b \n\t"
- : [src0] "=&f"(src0), [src1] "=&f"(src1), [dest] "=&f"(dest)
- : [src0_ptr] "r"(src_argb), [src1_ptr] "r"(src_argb1),
- [dst_ptr] "r"(dst_argb), [width] "r"(width)
- : "memory");
-}
-
-// Sobel functions which mimics SSSE3.
-void SobelXRow_MMI(const uint8_t* src_y0,
- const uint8_t* src_y1,
- const uint8_t* src_y2,
- uint8_t* dst_sobelx,
- int width) {
- uint64_t y00 = 0, y10 = 0, y20 = 0;
- uint64_t y02 = 0, y12 = 0, y22 = 0;
- uint64_t zero = 0x0;
- uint64_t sobel = 0x0;
- __asm__ volatile(
- "1: \n\t"
- "gsldlc1 %[y00], 0x07(%[src_y0]) \n\t" // a=src_y0[i]
- "gsldrc1 %[y00], 0x00(%[src_y0]) \n\t"
- "gsldlc1 %[y02], 0x09(%[src_y0]) \n\t" // a_sub=src_y0[i+2]
- "gsldrc1 %[y02], 0x02(%[src_y0]) \n\t"
-
- "gsldlc1 %[y10], 0x07(%[src_y1]) \n\t" // b=src_y1[i]
- "gsldrc1 %[y10], 0x00(%[src_y1]) \n\t"
- "gsldlc1 %[y12], 0x09(%[src_y1]) \n\t" // b_sub=src_y1[i+2]
- "gsldrc1 %[y12], 0x02(%[src_y1]) \n\t"
-
- "gsldlc1 %[y20], 0x07(%[src_y2]) \n\t" // c=src_y2[i]
- "gsldrc1 %[y20], 0x00(%[src_y2]) \n\t"
- "gsldlc1 %[y22], 0x09(%[src_y2]) \n\t" // c_sub=src_y2[i+2]
- "gsldrc1 %[y22], 0x02(%[src_y2]) \n\t"
-
- "punpcklbh %[y00], %[y00], %[zero] \n\t"
- "punpcklbh %[y10], %[y10], %[zero] \n\t"
- "punpcklbh %[y20], %[y20], %[zero] \n\t"
-
- "punpcklbh %[y02], %[y02], %[zero] \n\t"
- "punpcklbh %[y12], %[y12], %[zero] \n\t"
- "punpcklbh %[y22], %[y22], %[zero] \n\t"
-
- "paddh %[y00], %[y00], %[y10] \n\t" // a+b
- "paddh %[y20], %[y20], %[y10] \n\t" // c+b
- "paddh %[y00], %[y00], %[y20] \n\t" // a+2b+c
-
- "paddh %[y02], %[y02], %[y12] \n\t" // a_sub+b_sub
- "paddh %[y22], %[y22], %[y12] \n\t" // c_sub+b_sub
- "paddh %[y02], %[y02], %[y22] \n\t" // a_sub+2b_sub+c_sub
-
- "pmaxsh %[y10], %[y00], %[y02] \n\t"
- "pminsh %[y20], %[y00], %[y02] \n\t"
- "psubh %[sobel], %[y10], %[y20] \n\t" // Abs
-
- "gsldlc1 %[y00], 0x0B(%[src_y0]) \n\t"
- "gsldrc1 %[y00], 0x04(%[src_y0]) \n\t"
- "gsldlc1 %[y02], 0x0D(%[src_y0]) \n\t"
- "gsldrc1 %[y02], 0x06(%[src_y0]) \n\t"
-
- "gsldlc1 %[y10], 0x0B(%[src_y1]) \n\t"
- "gsldrc1 %[y10], 0x04(%[src_y1]) \n\t"
- "gsldlc1 %[y12], 0x0D(%[src_y1]) \n\t"
- "gsldrc1 %[y12], 0x06(%[src_y1]) \n\t"
-
- "gsldlc1 %[y20], 0x0B(%[src_y2]) \n\t"
- "gsldrc1 %[y20], 0x04(%[src_y2]) \n\t"
- "gsldlc1 %[y22], 0x0D(%[src_y2]) \n\t"
- "gsldrc1 %[y22], 0x06(%[src_y2]) \n\t"
-
- "punpcklbh %[y00], %[y00], %[zero] \n\t"
- "punpcklbh %[y10], %[y10], %[zero] \n\t"
- "punpcklbh %[y20], %[y20], %[zero] \n\t"
-
- "punpcklbh %[y02], %[y02], %[zero] \n\t"
- "punpcklbh %[y12], %[y12], %[zero] \n\t"
- "punpcklbh %[y22], %[y22], %[zero] \n\t"
-
- "paddh %[y00], %[y00], %[y10] \n\t"
- "paddh %[y20], %[y20], %[y10] \n\t"
- "paddh %[y00], %[y00], %[y20] \n\t"
-
- "paddh %[y02], %[y02], %[y12] \n\t"
- "paddh %[y22], %[y22], %[y12] \n\t"
- "paddh %[y02], %[y02], %[y22] \n\t"
-
- "pmaxsh %[y10], %[y00], %[y02] \n\t"
- "pminsh %[y20], %[y00], %[y02] \n\t"
- "psubh %[y00], %[y10], %[y20] \n\t"
-
- "packushb %[sobel], %[sobel], %[y00] \n\t" // clamp255
- "gssdrc1 %[sobel], 0(%[dst_sobelx]) \n\t"
- "gssdlc1 %[sobel], 7(%[dst_sobelx]) \n\t"
-
- "daddiu %[src_y0], %[src_y0], 8 \n\t"
- "daddiu %[src_y1], %[src_y1], 8 \n\t"
- "daddiu %[src_y2], %[src_y2], 8 \n\t"
- "daddiu %[dst_sobelx], %[dst_sobelx], 8 \n\t"
- "daddiu %[width], %[width], -8 \n\t"
- "bgtz %[width], 1b \n\t"
- "nop \n\t"
- : [sobel] "=&f"(sobel), [y00] "=&f"(y00), [y10] "=&f"(y10),
- [y20] "=&f"(y20), [y02] "=&f"(y02), [y12] "=&f"(y12), [y22] "=&f"(y22)
- : [src_y0] "r"(src_y0), [src_y1] "r"(src_y1), [src_y2] "r"(src_y2),
- [dst_sobelx] "r"(dst_sobelx), [width] "r"(width), [zero] "f"(zero)
- : "memory");
-}
-
-void SobelYRow_MMI(const uint8_t* src_y0,
- const uint8_t* src_y1,
- uint8_t* dst_sobely,
- int width) {
- uint64_t y00 = 0, y01 = 0, y02 = 0;
- uint64_t y10 = 0, y11 = 0, y12 = 0;
- uint64_t zero = 0x0;
- uint64_t sobel = 0x0;
- __asm__ volatile(
- "1: \n\t"
- "gsldlc1 %[y00], 0x07(%[src_y0]) \n\t" // a=src_y0[i]
- "gsldrc1 %[y00], 0x00(%[src_y0]) \n\t"
- "gsldlc1 %[y01], 0x08(%[src_y0]) \n\t" // b=src_y0[i+1]
- "gsldrc1 %[y01], 0x01(%[src_y0]) \n\t"
- "gsldlc1 %[y02], 0x09(%[src_y0]) \n\t" // c=src_y0[i+2]
- "gsldrc1 %[y02], 0x02(%[src_y0]) \n\t"
-
- "gsldlc1 %[y10], 0x07(%[src_y1]) \n\t" // a_sub=src_y1[i]
- "gsldrc1 %[y10], 0x00(%[src_y1]) \n\t"
- "gsldlc1 %[y11], 0x08(%[src_y1]) \n\t" // b_sub=src_y1[i+1]
- "gsldrc1 %[y11], 0x01(%[src_y1]) \n\t"
- "gsldlc1 %[y12], 0x09(%[src_y1]) \n\t" // c_sub=src_y1[i+2]
- "gsldrc1 %[y12], 0x02(%[src_y1]) \n\t"
-
- "punpcklbh %[y00], %[y00], %[zero] \n\t"
- "punpcklbh %[y01], %[y01], %[zero] \n\t"
- "punpcklbh %[y02], %[y02], %[zero] \n\t"
-
- "punpcklbh %[y10], %[y10], %[zero] \n\t"
- "punpcklbh %[y11], %[y11], %[zero] \n\t"
- "punpcklbh %[y12], %[y12], %[zero] \n\t"
-
- "paddh %[y00], %[y00], %[y01] \n\t" // a+b
- "paddh %[y02], %[y02], %[y01] \n\t" // c+b
- "paddh %[y00], %[y00], %[y02] \n\t" // a+2b+c
-
- "paddh %[y10], %[y10], %[y11] \n\t" // a_sub+b_sub
- "paddh %[y12], %[y12], %[y11] \n\t" // c_sub+b_sub
- "paddh %[y10], %[y10], %[y12] \n\t" // a_sub+2b_sub+c_sub
-
- "pmaxsh %[y02], %[y00], %[y10] \n\t"
- "pminsh %[y12], %[y00], %[y10] \n\t"
- "psubh %[sobel], %[y02], %[y12] \n\t" // Abs
-
- "gsldlc1 %[y00], 0x0B(%[src_y0]) \n\t"
- "gsldrc1 %[y00], 0x04(%[src_y0]) \n\t"
- "gsldlc1 %[y01], 0x0C(%[src_y0]) \n\t"
- "gsldrc1 %[y01], 0x05(%[src_y0]) \n\t"
- "gsldlc1 %[y02], 0x0D(%[src_y0]) \n\t"
- "gsldrc1 %[y02], 0x06(%[src_y0]) \n\t"
-
- "gsldlc1 %[y10], 0x0B(%[src_y1]) \n\t"
- "gsldrc1 %[y10], 0x04(%[src_y1]) \n\t"
- "gsldlc1 %[y11], 0x0C(%[src_y1]) \n\t"
- "gsldrc1 %[y11], 0x05(%[src_y1]) \n\t"
- "gsldlc1 %[y12], 0x0D(%[src_y1]) \n\t"
- "gsldrc1 %[y12], 0x06(%[src_y1]) \n\t"
-
- "punpcklbh %[y00], %[y00], %[zero] \n\t"
- "punpcklbh %[y01], %[y01], %[zero] \n\t"
- "punpcklbh %[y02], %[y02], %[zero] \n\t"
-
- "punpcklbh %[y10], %[y10], %[zero] \n\t"
- "punpcklbh %[y11], %[y11], %[zero] \n\t"
- "punpcklbh %[y12], %[y12], %[zero] \n\t"
-
- "paddh %[y00], %[y00], %[y01] \n\t"
- "paddh %[y02], %[y02], %[y01] \n\t"
- "paddh %[y00], %[y00], %[y02] \n\t"
-
- "paddh %[y10], %[y10], %[y11] \n\t"
- "paddh %[y12], %[y12], %[y11] \n\t"
- "paddh %[y10], %[y10], %[y12] \n\t"
-
- "pmaxsh %[y02], %[y00], %[y10] \n\t"
- "pminsh %[y12], %[y00], %[y10] \n\t"
- "psubh %[y00], %[y02], %[y12] \n\t"
-
- "packushb %[sobel], %[sobel], %[y00] \n\t" // clamp255
- "gssdrc1 %[sobel], 0(%[dst_sobely]) \n\t"
- "gssdlc1 %[sobel], 7(%[dst_sobely]) \n\t"
-
- "daddiu %[src_y0], %[src_y0], 8 \n\t"
- "daddiu %[src_y1], %[src_y1], 8 \n\t"
- "daddiu %[dst_sobely], %[dst_sobely], 8 \n\t"
- "daddiu %[width], %[width], -8 \n\t"
- "bgtz %[width], 1b \n\t"
- "nop \n\t"
- : [sobel] "=&f"(sobel), [y00] "=&f"(y00), [y01] "=&f"(y01),
- [y02] "=&f"(y02), [y10] "=&f"(y10), [y11] "=&f"(y11), [y12] "=&f"(y12)
- : [src_y0] "r"(src_y0), [src_y1] "r"(src_y1),
- [dst_sobely] "r"(dst_sobely), [width] "r"(width), [zero] "f"(zero)
- : "memory");
-}
-
-void SobelRow_MMI(const uint8_t* src_sobelx,
- const uint8_t* src_sobely,
- uint8_t* dst_argb,
- int width) {
- double temp[3];
- uint64_t c1 = 0xff000000ff000000;
- __asm__ volatile(
- "1: \n\t"
- "gsldlc1 %[t0], 0x07(%[src_sobelx]) \n\t" // a=src_sobelx[i]
- "gsldrc1 %[t0], 0x00(%[src_sobelx]) \n\t"
- "gsldlc1 %[t1], 0x07(%[src_sobely]) \n\t" // b=src_sobely[i]
- "gsldrc1 %[t1], 0x00(%[src_sobely]) \n\t"
- // s7 s6 s5 s4 s3 s2 s1 s0 = a+b
- "paddusb %[t2] , %[t0], %[t1] \n\t"
-
- // s3 s2 s1 s0->s3 s3 s2 s2 s1 s1 s0 s0
- "punpcklbh %[t0], %[t2], %[t2] \n\t"
-
- // s1 s1 s0 s0->s1 s2 s1 s1 s0 s0 s0 s0
- "punpcklbh %[t1], %[t0], %[t0] \n\t"
- "or %[t1], %[t1], %[c1] \n\t"
- // 255 s1 s1 s1 s55 s0 s0 s0
- "gssdrc1 %[t1], 0x00(%[dst_argb]) \n\t"
- "gssdlc1 %[t1], 0x07(%[dst_argb]) \n\t"
-
- // s3 s3 s2 s2->s3 s3 s3 s3 s2 s2 s2 s2
- "punpckhbh %[t1], %[t0], %[t0] \n\t"
- "or %[t1], %[t1], %[c1] \n\t"
- // 255 s3 s3 s3 255 s2 s2 s2
- "gssdrc1 %[t1], 0x08(%[dst_argb]) \n\t"
- "gssdlc1 %[t1], 0x0f(%[dst_argb]) \n\t"
-
- // s7 s6 s5 s4->s7 s7 s6 s6 s5 s5 s4 s4
- "punpckhbh %[t0], %[t2], %[t2] \n\t"
-
- // s5 s5 s4 s4->s5 s5 s5 s5 s4 s4 s4 s4
- "punpcklbh %[t1], %[t0], %[t0] \n\t"
- "or %[t1], %[t1], %[c1] \n\t"
- "gssdrc1 %[t1], 0x10(%[dst_argb]) \n\t"
- "gssdlc1 %[t1], 0x17(%[dst_argb]) \n\t"
-
- // s7 s7 s6 s6->s7 s7 s7 s7 s6 s6 s6 s6
- "punpckhbh %[t1], %[t0], %[t0] \n\t"
- "or %[t1], %[t1], %[c1] \n\t"
- "gssdrc1 %[t1], 0x18(%[dst_argb]) \n\t"
- "gssdlc1 %[t1], 0x1f(%[dst_argb]) \n\t"
-
- "daddiu %[dst_argb], %[dst_argb], 32 \n\t"
- "daddiu %[src_sobelx], %[src_sobelx], 8 \n\t"
- "daddiu %[src_sobely], %[src_sobely], 8 \n\t"
- "daddiu %[width], %[width], -8 \n\t"
- "bgtz %[width], 1b \n\t"
- "nop \n\t"
- : [t0] "=&f"(temp[0]), [t1] "=&f"(temp[1]), [t2] "=&f"(temp[2])
- : [src_sobelx] "r"(src_sobelx), [src_sobely] "r"(src_sobely),
- [dst_argb] "r"(dst_argb), [width] "r"(width), [c1] "f"(c1)
- : "memory");
-}
-
-void SobelToPlaneRow_MMI(const uint8_t* src_sobelx,
- const uint8_t* src_sobely,
- uint8_t* dst_y,
- int width) {
- uint64_t tr = 0;
- uint64_t tb = 0;
- __asm__ volatile(
- "1: \n\t"
- "gsldrc1 %[tr], 0x0(%[src_sobelx]) \n\t"
- "gsldlc1 %[tr], 0x7(%[src_sobelx]) \n\t" // r=src_sobelx[i]
- "gsldrc1 %[tb], 0x0(%[src_sobely]) \n\t"
- "gsldlc1 %[tb], 0x7(%[src_sobely]) \n\t" // b=src_sobely[i]
- "paddusb %[tr], %[tr], %[tb] \n\t" // g
- "gssdrc1 %[tr], 0x0(%[dst_y]) \n\t"
- "gssdlc1 %[tr], 0x7(%[dst_y]) \n\t"
-
- "daddiu %[dst_y], %[dst_y], 8 \n\t"
- "daddiu %[src_sobelx], %[src_sobelx], 8 \n\t"
- "daddiu %[src_sobely], %[src_sobely], 8 \n\t"
- "daddiu %[width], %[width], -8 \n\t"
- "bgtz %[width], 1b \n\t"
- "nop \n\t"
- : [tr] "=&f"(tr), [tb] "=&f"(tb)
- : [src_sobelx] "r"(src_sobelx), [src_sobely] "r"(src_sobely),
- [dst_y] "r"(dst_y), [width] "r"(width)
- : "memory");
-}
-
-void SobelXYRow_MMI(const uint8_t* src_sobelx,
- const uint8_t* src_sobely,
- uint8_t* dst_argb,
- int width) {
- uint64_t temp[3];
- uint64_t result = 0;
- uint64_t gb = 0;
- uint64_t cr = 0;
- uint64_t c1 = 0xffffffffffffffff;
- __asm__ volatile(
- "1: \n\t"
- "gsldlc1 %[tr], 0x07(%[src_sobelx]) \n\t" // r=src_sobelx[i]
- "gsldrc1 %[tr], 0x00(%[src_sobelx]) \n\t"
- "gsldlc1 %[tb], 0x07(%[src_sobely]) \n\t" // b=src_sobely[i]
- "gsldrc1 %[tb], 0x00(%[src_sobely]) \n\t"
- "paddusb %[tg] , %[tr], %[tb] \n\t" // g
-
- // g3 b3 g2 b2 g1 b1 g0 b0
- "punpcklbh %[gb], %[tb], %[tg] \n\t"
- // c3 r3 r2 r2 c1 r1 c0 r0
- "punpcklbh %[cr], %[tr], %[c1] \n\t"
- // c1 r1 g1 b1 c0 r0 g0 b0
- "punpcklhw %[result], %[gb], %[cr] \n\t"
- "gssdrc1 %[result], 0x00(%[dst_argb]) \n\t"
- "gssdlc1 %[result], 0x07(%[dst_argb]) \n\t"
- // c3 r3 g3 b3 c2 r2 g2 b2
- "punpckhhw %[result], %[gb], %[cr] \n\t"
- "gssdrc1 %[result], 0x08(%[dst_argb]) \n\t"
- "gssdlc1 %[result], 0x0f(%[dst_argb]) \n\t"
-
- // g7 b7 g6 b6 g5 b5 g4 b4
- "punpckhbh %[gb], %[tb], %[tg] \n\t"
- // c7 r7 c6 r6 c5 r5 c4 r4
- "punpckhbh %[cr], %[tr], %[c1] \n\t"
- // c5 r5 g5 b5 c4 r4 g4 b4
- "punpcklhw %[result], %[gb], %[cr] \n\t"
- "gssdrc1 %[result], 0x10(%[dst_argb]) \n\t"
- "gssdlc1 %[result], 0x17(%[dst_argb]) \n\t"
- // c7 r7 g7 b7 c6 r6 g6 b6
- "punpckhhw %[result], %[gb], %[cr] \n\t"
- "gssdrc1 %[result], 0x18(%[dst_argb]) \n\t"
- "gssdlc1 %[result], 0x1f(%[dst_argb]) \n\t"
-
- "daddiu %[dst_argb], %[dst_argb], 32 \n\t"
- "daddiu %[src_sobelx], %[src_sobelx], 8 \n\t"
- "daddiu %[src_sobely], %[src_sobely], 8 \n\t"
- "daddiu %[width], %[width], -8 \n\t"
- "bgtz %[width], 1b \n\t"
- "nop \n\t"
- : [tr] "=&f"(temp[0]), [tb] "=&f"(temp[1]), [tg] "=&f"(temp[2]),
- [gb] "=&f"(gb), [cr] "=&f"(cr), [result] "=&f"(result)
- : [src_sobelx] "r"(src_sobelx), [src_sobely] "r"(src_sobely),
- [dst_argb] "r"(dst_argb), [width] "r"(width), [c1] "f"(c1)
- : "memory");
-}
-
-void J400ToARGBRow_MMI(const uint8_t* src_y, uint8_t* dst_argb, int width) {
- // Copy a Y to RGB.
- uint64_t src, dest;
- const uint64_t mask0 = 0x00ffffff00ffffffULL;
- const uint64_t mask1 = ~mask0;
-
- __asm__ volatile(
- "1: \n\t"
- "gslwlc1 %[src], 0x03(%[src_ptr]) \n\t"
- "gslwrc1 %[src], 0x00(%[src_ptr]) \n\t"
- "punpcklbh %[src], %[src], %[src] \n\t"
- "punpcklhw %[dest], %[src], %[src] \n\t"
- "and %[dest], %[dest], %[mask0] \n\t"
- "or %[dest], %[dest], %[mask1] \n\t"
- "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
- "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
-
- "punpckhhw %[dest], %[src], %[src] \n\t"
- "and %[dest], %[dest], %[mask0] \n\t"
- "or %[dest], %[dest], %[mask1] \n\t"
- "gssdrc1 %[dest], 0x08(%[dst_ptr]) \n\t"
- "gssdlc1 %[dest], 0x0f(%[dst_ptr]) \n\t"
-
- "daddiu %[src_ptr], %[src_ptr], 0x04 \n\t"
- "daddiu %[dst_ptr], %[dst_ptr], 0x10 \n\t"
- "daddi %[width], %[width], -0x04 \n\t"
- "bnez %[width], 1b \n\t"
- : [src] "=&f"(src), [dest] "=&f"(dest)
- : [src_ptr] "r"(src_y), [dst_ptr] "r"(dst_argb), [mask0] "f"(mask0),
- [mask1] "f"(mask1), [width] "r"(width)
- : "memory");
-}
-
-// TODO - respect YuvConstants
-void I400ToARGBRow_MMI(const uint8_t* src_y, uint8_t* rgb_buf,
- const struct YuvConstants*, int width) {
- uint64_t src, src_lo, src_hi, dest, dest_lo, dest_hi;
- const uint64_t mask0 = 0x0;
- const uint64_t mask1 = 0x55;
- const uint64_t mask2 = 0xAA;
- const uint64_t mask3 = 0xFF;
- const uint64_t mask4 = 0x4A354A354A354A35ULL;
- const uint64_t mask5 = 0x0488048804880488ULL;
- const uint64_t shift0 = 0x08;
- const uint64_t shift1 = 0x06;
-
- __asm__ volatile(
- "1: \n\t"
- "gsldlc1 %[src], 0x07(%[src_ptr]) \n\t"
- "gsldrc1 %[src], 0x00(%[src_ptr]) \n\t"
- "punpcklbh %[src_lo], %[src], %[mask0] \n\t"
- "punpckhbh %[src_hi], %[src], %[mask0] \n\t"
-
- "pshufh %[src], %[src_lo], %[mask0] \n\t"
- "psllh %[dest_lo], %[src], %[shift0] \n\t"
- "paddush %[dest_lo], %[dest_lo], %[src] \n\t"
- "pmulhuh %[dest_lo], %[dest_lo], %[mask4] \n\t"
- "psubh %[dest_lo], %[dest_lo], %[mask5] \n\t"
- "psrah %[dest_lo], %[dest_lo], %[shift1] \n\t"
- "pinsrh_3 %[dest_lo], %[dest_lo], %[mask3] \n\t"
- "pshufh %[src], %[src_lo], %[mask1] \n\t"
- "psllh %[dest_hi], %[src], %[shift0] \n\t"
- "paddush %[dest_hi], %[dest_hi], %[src] \n\t"
- "pmulhuh %[dest_hi], %[dest_hi], %[mask4] \n\t"
- "psubh %[dest_hi], %[dest_hi], %[mask5] \n\t"
- "psrah %[dest_hi], %[dest_hi], %[shift1] \n\t"
- "pinsrh_3 %[dest_hi], %[dest_hi], %[mask3] \n\t"
- "packushb %[dest], %[dest_lo], %[dest_hi] \n\t"
- "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
- "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
-
- "pshufh %[src], %[src_lo], %[mask2] \n\t"
- "psllh %[dest_lo], %[src], %[shift0] \n\t"
- "paddush %[dest_lo], %[dest_lo], %[src] \n\t"
- "pmulhuh %[dest_lo], %[dest_lo], %[mask4] \n\t"
- "psubh %[dest_lo], %[dest_lo], %[mask5] \n\t"
- "psrah %[dest_lo], %[dest_lo], %[shift1] \n\t"
- "pinsrh_3 %[dest_lo], %[dest_lo], %[mask3] \n\t"
- "pshufh %[src], %[src_lo], %[mask3] \n\t"
- "psllh %[dest_hi], %[src], %[shift0] \n\t"
- "paddush %[dest_hi], %[dest_hi], %[src] \n\t"
- "pmulhuh %[dest_hi], %[dest_hi], %[mask4] \n\t"
- "psubh %[dest_hi], %[dest_hi], %[mask5] \n\t"
- "psrah %[dest_hi], %[dest_hi], %[shift1] \n\t"
- "pinsrh_3 %[dest_hi], %[dest_hi], %[mask3] \n\t"
- "packushb %[dest], %[dest_lo], %[dest_hi] \n\t"
- "gssdlc1 %[dest], 0x0f(%[dst_ptr]) \n\t"
- "gssdrc1 %[dest], 0x08(%[dst_ptr]) \n\t"
-
- "pshufh %[src], %[src_hi], %[mask0] \n\t"
- "psllh %[dest_lo], %[src], %[shift0] \n\t"
- "paddush %[dest_lo], %[dest_lo], %[src] \n\t"
- "pmulhuh %[dest_lo], %[dest_lo], %[mask4] \n\t"
- "psubh %[dest_lo], %[dest_lo], %[mask5] \n\t"
- "psrah %[dest_lo], %[dest_lo], %[shift1] \n\t"
- "pinsrh_3 %[dest_lo], %[dest_lo], %[mask3] \n\t"
- "pshufh %[src], %[src_hi], %[mask1] \n\t"
- "psllh %[dest_hi], %[src], %[shift0] \n\t"
- "paddush %[dest_hi], %[dest_hi], %[src] \n\t"
- "pmulhuh %[dest_hi], %[dest_hi], %[mask4] \n\t"
- "psubh %[dest_hi], %[dest_hi], %[mask5] \n\t"
- "psrah %[dest_hi], %[dest_hi], %[shift1] \n\t"
- "pinsrh_3 %[dest_hi], %[dest_hi], %[mask3] \n\t"
- "packushb %[dest], %[dest_lo], %[dest_hi] \n\t"
- "gssdlc1 %[dest], 0x17(%[dst_ptr]) \n\t"
- "gssdrc1 %[dest], 0x10(%[dst_ptr]) \n\t"
-
- "pshufh %[src], %[src_hi], %[mask2] \n\t"
- "psllh %[dest_lo], %[src], %[shift0] \n\t"
- "paddush %[dest_lo], %[dest_lo], %[src] \n\t"
- "pmulhuh %[dest_lo], %[dest_lo], %[mask4] \n\t"
- "psubh %[dest_lo], %[dest_lo], %[mask5] \n\t"
- "psrah %[dest_lo], %[dest_lo], %[shift1] \n\t"
- "pinsrh_3 %[dest_lo], %[dest_lo], %[mask3] \n\t"
- "pshufh %[src], %[src_hi], %[mask3] \n\t"
- "psllh %[dest_hi], %[src], %[shift0] \n\t"
- "paddush %[dest_hi], %[dest_hi], %[src] \n\t"
- "pmulhuh %[dest_hi], %[dest_hi], %[mask4] \n\t"
- "psubh %[dest_hi], %[dest_hi], %[mask5] \n\t"
- "psrah %[dest_hi], %[dest_hi], %[shift1] \n\t"
- "pinsrh_3 %[dest_hi], %[dest_hi], %[mask3] \n\t"
- "packushb %[dest], %[dest_lo], %[dest_hi] \n\t"
- "gssdlc1 %[dest], 0x1f(%[dst_ptr]) \n\t"
- "gssdrc1 %[dest], 0x18(%[dst_ptr]) \n\t"
-
- "daddi %[src_ptr], %[src_ptr], 0x08 \n\t"
- "daddiu %[dst_ptr], %[dst_ptr], 0x20 \n\t"
- "daddi %[width], %[width], -0x08 \n\t"
- "bnez %[width], 1b \n\t"
- : [src] "=&f"(src), [dest] "=&f"(dest), [src_hi] "=&f"(src_hi),
- [src_lo] "=&f"(src_lo), [dest_hi] "=&f"(dest_hi),
- [dest_lo] "=&f"(dest_lo)
- : [src_ptr] "r"(src_y), [dst_ptr] "r"(rgb_buf), [mask0] "f"(mask0),
- [mask1] "f"(mask1), [mask2] "f"(mask2), [mask3] "f"(mask3),
- [mask4] "f"(mask4), [mask5] "f"(mask5), [shift0] "f"(shift0),
- [shift1] "f"(shift1), [width] "r"(width)
- : "memory");
-}
-
-void MirrorRow_MMI(const uint8_t* src, uint8_t* dst, int width) {
- uint64_t source, src0, src1, dest;
- const uint64_t mask0 = 0x0;
- const uint64_t mask1 = 0x1b;
-
- src += width - 1;
- __asm__ volatile(
- "1: \n\t"
- "gsldlc1 %[source], 0(%[src_ptr]) \n\t"
- "gsldrc1 %[source], -7(%[src_ptr]) \n\t"
- "punpcklbh %[src0], %[source], %[mask0] \n\t"
- "pshufh %[src0], %[src0], %[mask1] \n\t"
- "punpckhbh %[src1], %[source], %[mask0] \n\t"
- "pshufh %[src1], %[src1], %[mask1] \n\t"
- "packushb %[dest], %[src1], %[src0] \n\t"
-
- "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
- "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
-
- "daddi %[src_ptr], %[src_ptr], -0x08 \n\t"
- "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t"
- "daddi %[width], %[width], -0x08 \n\t"
- "bnez %[width], 1b \n\t"
- : [source] "=&f"(source), [dest] "=&f"(dest), [src0] "=&f"(src0),
- [src1] "=&f"(src1)
- : [src_ptr] "r"(src), [dst_ptr] "r"(dst), [mask0] "f"(mask0),
- [mask1] "f"(mask1), [width] "r"(width)
- : "memory");
-}
-
-void MirrorSplitUVRow_MMI(const uint8_t* src_uv,
- uint8_t* dst_u,
- uint8_t* dst_v,
- int width) {
- uint64_t src0, src1, dest0, dest1;
- const uint64_t mask0 = 0x00ff00ff00ff00ffULL;
- const uint64_t mask1 = 0x1b;
- const uint64_t shift = 0x08;
-
- src_uv += (width - 1) << 1;
-
- __asm__ volatile(
- "1: \n\t"
- "gsldlc1 %[src0], 1(%[src_ptr]) \n\t"
- "gsldrc1 %[src0], -6(%[src_ptr]) \n\t"
- "gsldlc1 %[src1], -7(%[src_ptr]) \n\t"
- "gsldrc1 %[src1], -14(%[src_ptr]) \n\t"
-
- "and %[dest0], %[src0], %[mask0] \n\t"
- "pshufh %[dest0], %[dest0], %[mask1] \n\t"
- "and %[dest1], %[src1], %[mask0] \n\t"
- "pshufh %[dest1], %[dest1], %[mask1] \n\t"
- "packushb %[dest0], %[dest0], %[dest1] \n\t"
- "gssdlc1 %[dest0], 0x07(%[dstu_ptr]) \n\t"
- "gssdrc1 %[dest0], 0x00(%[dstu_ptr]) \n\t"
-
- "psrlh %[dest0], %[src0], %[shift] \n\t"
- "pshufh %[dest0], %[dest0], %[mask1] \n\t"
- "psrlh %[dest1], %[src1], %[shift] \n\t"
- "pshufh %[dest1], %[dest1], %[mask1] \n\t"
- "packushb %[dest0], %[dest0], %[dest1] \n\t"
- "gssdlc1 %[dest0], 0x07(%[dstv_ptr]) \n\t"
- "gssdrc1 %[dest0], 0x00(%[dstv_ptr]) \n\t"
-
- "daddi %[src_ptr], %[src_ptr], -0x10 \n\t"
- "daddiu %[dstu_ptr], %[dstu_ptr], 0x08 \n\t"
- "daddiu %[dstv_ptr], %[dstv_ptr], 0x08 \n\t"
- "daddi %[width], %[width], -0x08 \n\t"
- "bnez %[width], 1b \n\t"
- : [dest0] "=&f"(dest0), [dest1] "=&f"(dest1), [src0] "=&f"(src0),
- [src1] "=&f"(src1)
- : [src_ptr] "r"(src_uv), [dstu_ptr] "r"(dst_u), [dstv_ptr] "r"(dst_v),
- [width] "r"(width), [mask0] "f"(mask0), [mask1] "f"(mask1),
- [shift] "f"(shift)
- : "memory");
-}
-
-void ARGBMirrorRow_MMI(const uint8_t* src, uint8_t* dst, int width) {
- src += (width - 1) * 4;
- uint64_t temp = 0x0;
- uint64_t shuff = 0x4e; // 01 00 11 10
- __asm__ volatile(
- "1: \n\t"
- "gsldlc1 %[temp], 3(%[src]) \n\t"
- "gsldrc1 %[temp], -4(%[src]) \n\t"
- "pshufh %[temp], %[temp], %[shuff] \n\t"
- "gssdrc1 %[temp], 0x0(%[dst]) \n\t"
- "gssdlc1 %[temp], 0x7(%[dst]) \n\t"
-
- "daddiu %[src], %[src], -0x08 \n\t"
- "daddiu %[dst], %[dst], 0x08 \n\t"
- "daddiu %[width], %[width], -0x02 \n\t"
- "bnez %[width], 1b \n\t"
- : [temp] "=&f"(temp)
- : [src] "r"(src), [dst] "r"(dst), [width] "r"(width), [shuff] "f"(shuff)
- : "memory");
-}
-
-void SplitUVRow_MMI(const uint8_t* src_uv,
- uint8_t* dst_u,
- uint8_t* dst_v,
- int width) {
- uint64_t c0 = 0x00ff00ff00ff00ff;
- uint64_t temp[4];
- uint64_t shift = 0x08;
- __asm__ volatile(
- "1: \n\t"
- "gsldrc1 %[t0], 0x00(%[src_uv]) \n\t"
- "gsldlc1 %[t0], 0x07(%[src_uv]) \n\t"
- "gsldrc1 %[t1], 0x08(%[src_uv]) \n\t"
- "gsldlc1 %[t1], 0x0f(%[src_uv]) \n\t"
-
- "and %[t2], %[t0], %[c0] \n\t"
- "and %[t3], %[t1], %[c0] \n\t"
- "packushb %[t2], %[t2], %[t3] \n\t"
- "gssdrc1 %[t2], 0x0(%[dst_u]) \n\t"
- "gssdlc1 %[t2], 0x7(%[dst_u]) \n\t"
-
- "psrlh %[t2], %[t0], %[shift] \n\t"
- "psrlh %[t3], %[t1], %[shift] \n\t"
- "packushb %[t2], %[t2], %[t3] \n\t"
- "gssdrc1 %[t2], 0x0(%[dst_v]) \n\t"
- "gssdlc1 %[t2], 0x7(%[dst_v]) \n\t"
-
- "daddiu %[src_uv], %[src_uv], 16 \n\t"
- "daddiu %[dst_u], %[dst_u], 8 \n\t"
- "daddiu %[dst_v], %[dst_v], 8 \n\t"
- "daddiu %[width], %[width], -8 \n\t"
- "bgtz %[width], 1b \n\t"
- "nop \n\t"
- : [t0] "=&f"(temp[0]), [t1] "=&f"(temp[1]), [t2] "=&f"(temp[2]),
- [t3] "=&f"(temp[3])
- : [src_uv] "r"(src_uv), [dst_u] "r"(dst_u), [dst_v] "r"(dst_v),
- [width] "r"(width), [c0] "f"(c0), [shift] "f"(shift)
- : "memory");
-}
-
-void MergeUVRow_MMI(const uint8_t* src_u,
- const uint8_t* src_v,
- uint8_t* dst_uv,
- int width) {
- uint64_t temp[3];
- __asm__ volatile(
- "1: \n\t"
- "gsldrc1 %[t0], 0x0(%[src_u]) \n\t"
- "gsldlc1 %[t0], 0x7(%[src_u]) \n\t"
- "gsldrc1 %[t1], 0x0(%[src_v]) \n\t"
- "gsldlc1 %[t1], 0x7(%[src_v]) \n\t"
- "punpcklbh %[t2], %[t0], %[t1] \n\t"
- "gssdrc1 %[t2], 0x0(%[dst_uv]) \n\t"
- "gssdlc1 %[t2], 0x7(%[dst_uv]) \n\t"
- "punpckhbh %[t2], %[t0], %[t1] \n\t"
- "gssdrc1 %[t2], 0x8(%[dst_uv]) \n\t"
- "gssdlc1 %[t2], 0xf(%[dst_uv]) \n\t"
-
- "daddiu %[src_u], %[src_u], 8 \n\t"
- "daddiu %[src_v], %[src_v], 8 \n\t"
- "daddiu %[dst_uv], %[dst_uv], 16 \n\t"
- "daddiu %[width], %[width], -8 \n\t"
- "bgtz %[width], 1b \n\t"
- "nop \n\t"
- : [t0] "=&f"(temp[0]), [t1] "=&f"(temp[1]), [t2] "=&f"(temp[2])
- : [dst_uv] "r"(dst_uv), [src_u] "r"(src_u), [src_v] "r"(src_v),
- [width] "r"(width)
- : "memory");
-}
-
-void SplitRGBRow_MMI(const uint8_t* src_rgb,
- uint8_t* dst_r,
- uint8_t* dst_g,
- uint8_t* dst_b,
- int width) {
- uint64_t src[4];
- uint64_t dest_hi, dest_lo, dest;
-
- __asm__ volatile(
- "1: \n\t"
- "gslwlc1 %[src0], 0x03(%[src_ptr]) \n\t"
- "gslwrc1 %[src0], 0x00(%[src_ptr]) \n\t"
- "gslwlc1 %[src1], 0x06(%[src_ptr]) \n\t"
- "gslwrc1 %[src1], 0x03(%[src_ptr]) \n\t"
- "punpcklbh %[dest_lo], %[src0], %[src1] \n\t"
- "gslwlc1 %[src2], 0x09(%[src_ptr]) \n\t"
- "gslwrc1 %[src2], 0x06(%[src_ptr]) \n\t"
- "gslwlc1 %[src3], 0x0c(%[src_ptr]) \n\t"
- "gslwrc1 %[src3], 0x09(%[src_ptr]) \n\t"
- "punpcklbh %[dest_hi], %[src2], %[src3] \n\t"
-
- "punpcklhw %[dest], %[dest_lo], %[dest_hi] \n\t"
- "gsswlc1 %[dest], 0x03(%[dstr_ptr]) \n\t"
- "gsswrc1 %[dest], 0x00(%[dstr_ptr]) \n\t"
- "punpckhwd %[dest], %[dest], %[dest] \n\t"
- "gsswlc1 %[dest], 0x03(%[dstg_ptr]) \n\t"
- "gsswrc1 %[dest], 0x00(%[dstg_ptr]) \n\t"
- "punpckhhw %[dest], %[dest_lo], %[dest_hi] \n\t"
- "gsswlc1 %[dest], 0x03(%[dstb_ptr]) \n\t"
- "gsswrc1 %[dest], 0x00(%[dstb_ptr]) \n\t"
-
- "daddiu %[src_ptr], %[src_ptr], 0x0c \n\t"
- "daddiu %[dstr_ptr], %[dstr_ptr], 0x04 \n\t"
- "daddiu %[dstg_ptr], %[dstg_ptr], 0x04 \n\t"
- "daddiu %[dstb_ptr], %[dstb_ptr], 0x04 \n\t"
- "daddi %[width], %[width], -0x04 \n\t"
- "bnez %[width], 1b \n\t"
- : [src0] "=&f"(src[0]), [src1] "=&f"(src[1]), [src2] "=&f"(src[2]),
- [src3] "=&f"(src[3]), [dest_hi] "=&f"(dest_hi),
- [dest_lo] "=&f"(dest_lo), [dest] "=&f"(dest)
- : [src_ptr] "r"(src_rgb), [dstr_ptr] "r"(dst_r), [dstg_ptr] "r"(dst_g),
- [dstb_ptr] "r"(dst_b), [width] "r"(width)
- : "memory");
-}
-
-void MergeRGBRow_MMI(const uint8_t* src_r,
- const uint8_t* src_g,
- const uint8_t* src_b,
- uint8_t* dst_rgb,
- int width) {
- uint64_t srcr, srcg, srcb, dest;
- uint64_t srcrg_hi, srcrg_lo, srcbz_hi, srcbz_lo;
- const uint64_t temp = 0x0;
-
- __asm__ volatile(
- "1: \n\t"
- "gsldlc1 %[srcr], 0x07(%[srcr_ptr]) \n\t"
- "gsldrc1 %[srcr], 0x00(%[srcr_ptr]) \n\t"
- "gsldlc1 %[srcg], 0x07(%[srcg_ptr]) \n\t"
- "gsldrc1 %[srcg], 0x00(%[srcg_ptr]) \n\t"
- "punpcklbh %[srcrg_lo], %[srcr], %[srcg] \n\t"
- "punpckhbh %[srcrg_hi], %[srcr], %[srcg] \n\t"
-
- "gsldlc1 %[srcb], 0x07(%[srcb_ptr]) \n\t"
- "gsldrc1 %[srcb], 0x00(%[srcb_ptr]) \n\t"
- "punpcklbh %[srcbz_lo], %[srcb], %[temp] \n\t"
- "punpckhbh %[srcbz_hi], %[srcb], %[temp] \n\t"
-
- "punpcklhw %[dest], %[srcrg_lo], %[srcbz_lo] \n\t"
- "gsswlc1 %[dest], 0x03(%[dst_ptr]) \n\t"
- "gsswrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
- "punpckhwd %[dest], %[dest], %[dest] \n\t"
- "gsswlc1 %[dest], 0x06(%[dst_ptr]) \n\t"
- "gsswrc1 %[dest], 0x03(%[dst_ptr]) \n\t"
- "punpckhhw %[dest], %[srcrg_lo], %[srcbz_lo] \n\t"
- "gsswlc1 %[dest], 0x09(%[dst_ptr]) \n\t"
- "gsswrc1 %[dest], 0x06(%[dst_ptr]) \n\t"
- "punpckhwd %[dest], %[dest], %[dest] \n\t"
- "gsswlc1 %[dest], 0x0c(%[dst_ptr]) \n\t"
- "gsswrc1 %[dest], 0x09(%[dst_ptr]) \n\t"
- "punpcklhw %[dest], %[srcrg_hi], %[srcbz_hi] \n\t"
- "gsswlc1 %[dest], 0x0f(%[dst_ptr]) \n\t"
- "gsswrc1 %[dest], 0x0c(%[dst_ptr]) \n\t"
- "punpckhwd %[dest], %[dest], %[dest] \n\t"
- "gsswlc1 %[dest], 0x12(%[dst_ptr]) \n\t"
- "gsswrc1 %[dest], 0x0f(%[dst_ptr]) \n\t"
- "punpckhhw %[dest], %[srcrg_hi], %[srcbz_hi] \n\t"
- "gsswlc1 %[dest], 0x15(%[dst_ptr]) \n\t"
- "gsswrc1 %[dest], 0x12(%[dst_ptr]) \n\t"
- "punpckhwd %[dest], %[dest], %[dest] \n\t"
- "gsswlc1 %[dest], 0x18(%[dst_ptr]) \n\t"
- "gsswrc1 %[dest], 0x15(%[dst_ptr]) \n\t"
-
- "daddiu %[srcr_ptr], %[srcr_ptr], 0x08 \n\t"
- "daddiu %[srcg_ptr], %[srcg_ptr], 0x08 \n\t"
- "daddiu %[srcb_ptr], %[srcb_ptr], 0x08 \n\t"
- "daddiu %[dst_ptr], %[dst_ptr], 0x18 \n\t"
- "daddi %[width], %[width], -0x08 \n\t"
- "bnez %[width], 1b \n\t"
- : [srcr] "=&f"(srcr), [srcg] "=&f"(srcg), [srcb] "=&f"(srcb),
- [dest] "=&f"(dest), [srcrg_hi] "=&f"(srcrg_hi),
- [srcrg_lo] "=&f"(srcrg_lo), [srcbz_hi] "=&f"(srcbz_hi),
- [srcbz_lo] "=&f"(srcbz_lo)
- : [srcr_ptr] "r"(src_r), [srcg_ptr] "r"(src_g), [srcb_ptr] "r"(src_b),
- [dst_ptr] "r"(dst_rgb), [width] "r"(width), [temp] "f"(temp)
- : "memory");
-}
-
-// Filter 2 rows of YUY2 UV's (422) into U and V (420).
-void YUY2ToUVRow_MMI(const uint8_t* src_yuy2,
- int src_stride_yuy2,
- uint8_t* dst_u,
- uint8_t* dst_v,
- int width) {
- uint64_t c0 = 0xff00ff00ff00ff00;
- uint64_t c1 = 0x00ff00ff00ff00ff;
- uint64_t temp[3];
- uint64_t data[4];
- uint64_t shift = 0x08;
- uint64_t src_stride = 0x0;
- __asm__ volatile(
- "1: \n\t"
- "gsldrc1 %[t0], 0x00(%[src_yuy2]) \n\t"
- "gsldlc1 %[t0], 0x07(%[src_yuy2]) \n\t"
- "daddu %[src_stride], %[src_yuy2], %[src_stride_yuy2] \n\t"
- "gsldrc1 %[t1], 0x00(%[src_stride]) \n\t"
- "gsldlc1 %[t1], 0x07(%[src_stride]) \n\t"
- "pavgb %[t0], %[t0], %[t1] \n\t"
-
- "gsldrc1 %[t2], 0x08(%[src_yuy2]) \n\t"
- "gsldlc1 %[t2], 0x0f(%[src_yuy2]) \n\t"
- "gsldrc1 %[t1], 0x08(%[src_stride]) \n\t"
- "gsldlc1 %[t1], 0x0f(%[src_stride]) \n\t"
- "pavgb %[t1], %[t2], %[t1] \n\t"
-
- "and %[t0], %[t0], %[c0] \n\t"
- "and %[t1], %[t1], %[c0] \n\t"
- "psrlh %[t0], %[t0], %[shift] \n\t"
- "psrlh %[t1], %[t1], %[shift] \n\t"
- "packushb %[t0], %[t0], %[t1] \n\t"
- "mov.s %[t1], %[t0] \n\t"
- "and %[d0], %[t0], %[c1] \n\t"
- "psrlh %[d1], %[t1], %[shift] \n\t"
-
- "gsldrc1 %[t0], 0x10(%[src_yuy2]) \n\t"
- "gsldlc1 %[t0], 0x17(%[src_yuy2]) \n\t"
- "gsldrc1 %[t1], 0x10(%[src_stride]) \n\t"
- "gsldlc1 %[t1], 0x17(%[src_stride]) \n\t"
- "pavgb %[t0], %[t0], %[t1] \n\t"
-
- "gsldrc1 %[t2], 0x18(%[src_yuy2]) \n\t"
- "gsldlc1 %[t2], 0x1f(%[src_yuy2]) \n\t"
- "gsldrc1 %[t1], 0x18(%[src_stride]) \n\t"
- "gsldlc1 %[t1], 0x1f(%[src_stride]) \n\t"
- "pavgb %[t1], %[t2], %[t1] \n\t"
-
- "and %[t0], %[t0], %[c0] \n\t"
- "and %[t1], %[t1], %[c0] \n\t"
- "psrlh %[t0], %[t0], %[shift] \n\t"
- "psrlh %[t1], %[t1], %[shift] \n\t"
- "packushb %[t0], %[t0], %[t1] \n\t"
- "mov.s %[t1], %[t0] \n\t"
- "and %[d2], %[t0], %[c1] \n\t"
- "psrlh %[d3], %[t1], %[shift] \n\t"
-
- "packushb %[d0], %[d0], %[d2] \n\t"
- "packushb %[d1], %[d1], %[d3] \n\t"
- "gssdrc1 %[d0], 0x0(%[dst_u]) \n\t"
- "gssdlc1 %[d0], 0x7(%[dst_u]) \n\t"
- "gssdrc1 %[d1], 0x0(%[dst_v]) \n\t"
- "gssdlc1 %[d1], 0x7(%[dst_v]) \n\t"
- "daddiu %[src_yuy2], %[src_yuy2], 32 \n\t"
- "daddiu %[dst_u], %[dst_u], 8 \n\t"
- "daddiu %[dst_v], %[dst_v], 8 \n\t"
- "daddiu %[width], %[width], -16 \n\t"
- "bgtz %[width], 1b \n\t"
- "nop \n\t"
- : [t0] "=&f"(temp[0]), [t1] "=&f"(temp[1]), [t2] "=&f"(temp[2]),
- [d0] "=&f"(data[0]), [d1] "=&f"(data[1]), [d2] "=&f"(data[2]),
- [d3] "=&f"(data[3]), [src_stride] "=&r"(src_stride)
- : [src_yuy2] "r"(src_yuy2), [src_stride_yuy2] "r"(src_stride_yuy2),
- [dst_u] "r"(dst_u), [dst_v] "r"(dst_v), [width] "r"(width),
- [c0] "f"(c0), [c1] "f"(c1), [shift] "f"(shift)
- : "memory");
-}
-
-// Copy row of YUY2 UV's (422) into U and V (422).
-void YUY2ToUV422Row_MMI(const uint8_t* src_yuy2,
- uint8_t* dst_u,
- uint8_t* dst_v,
- int width) {
- uint64_t c0 = 0xff00ff00ff00ff00;
- uint64_t c1 = 0x00ff00ff00ff00ff;
- uint64_t temp[2];
- uint64_t data[4];
- uint64_t shift = 0x08;
- __asm__ volatile(
- "1: \n\t"
- "gsldrc1 %[t0], 0x00(%[src_yuy2]) \n\t"
- "gsldlc1 %[t0], 0x07(%[src_yuy2]) \n\t"
- "gsldrc1 %[t1], 0x08(%[src_yuy2]) \n\t"
- "gsldlc1 %[t1], 0x0f(%[src_yuy2]) \n\t"
- "and %[t0], %[t0], %[c0] \n\t"
- "and %[t1], %[t1], %[c0] \n\t"
- "psrlh %[t0], %[t0], %[shift] \n\t"
- "psrlh %[t1], %[t1], %[shift] \n\t"
- "packushb %[t0], %[t0], %[t1] \n\t"
- "mov.s %[t1], %[t0] \n\t"
- "and %[d0], %[t0], %[c1] \n\t"
- "psrlh %[d1], %[t1], %[shift] \n\t"
-
- "gsldrc1 %[t0], 0x10(%[src_yuy2]) \n\t"
- "gsldlc1 %[t0], 0x17(%[src_yuy2]) \n\t"
- "gsldrc1 %[t1], 0x18(%[src_yuy2]) \n\t"
- "gsldlc1 %[t1], 0x1f(%[src_yuy2]) \n\t"
- "and %[t0], %[t0], %[c0] \n\t"
- "and %[t1], %[t1], %[c0] \n\t"
- "psrlh %[t0], %[t0], %[shift] \n\t"
- "psrlh %[t1], %[t1], %[shift] \n\t"
- "packushb %[t0], %[t0], %[t1] \n\t"
- "mov.s %[t1], %[t0] \n\t"
- "and %[d2], %[t0], %[c1] \n\t"
- "psrlh %[d3], %[t1], %[shift] \n\t"
-
- "packushb %[d0], %[d0], %[d2] \n\t"
- "packushb %[d1], %[d1], %[d3] \n\t"
- "gssdrc1 %[d0], 0x0(%[dst_u]) \n\t"
- "gssdlc1 %[d0], 0x7(%[dst_u]) \n\t"
- "gssdrc1 %[d1], 0x0(%[dst_v]) \n\t"
- "gssdlc1 %[d1], 0x7(%[dst_v]) \n\t"
- "daddiu %[src_yuy2], %[src_yuy2], 32 \n\t"
- "daddiu %[dst_u], %[dst_u], 8 \n\t"
- "daddiu %[dst_v], %[dst_v], 8 \n\t"
- "daddiu %[width], %[width], -16 \n\t"
- "bgtz %[width], 1b \n\t"
- "nop \n\t"
- : [t0] "=&f"(temp[0]), [t1] "=&f"(temp[1]), [d0] "=&f"(data[0]),
- [d1] "=&f"(data[1]), [d2] "=&f"(data[2]), [d3] "=&f"(data[3])
- : [src_yuy2] "r"(src_yuy2), [dst_u] "r"(dst_u), [dst_v] "r"(dst_v),
- [width] "r"(width), [c0] "f"(c0), [c1] "f"(c1), [shift] "f"(shift)
- : "memory");
-}
-
-// Copy row of YUY2 Y's (422) into Y (420/422).
-void YUY2ToYRow_MMI(const uint8_t* src_yuy2, uint8_t* dst_y, int width) {
- uint64_t c0 = 0x00ff00ff00ff00ff;
- uint64_t temp[2];
- __asm__ volatile(
- "1: \n\t"
- "gsldrc1 %[t0], 0x00(%[src_yuy2]) \n\t"
- "gsldlc1 %[t0], 0x07(%[src_yuy2]) \n\t"
- "gsldrc1 %[t1], 0x08(%[src_yuy2]) \n\t"
- "gsldlc1 %[t1], 0x0f(%[src_yuy2]) \n\t"
- "and %[t0], %[t0], %[c0] \n\t"
- "and %[t1], %[t1], %[c0] \n\t"
- "packushb %[t0], %[t0], %[t1] \n\t"
- "gssdrc1 %[t0], 0x0(%[dst_y]) \n\t"
- "gssdlc1 %[t0], 0x7(%[dst_y]) \n\t"
- "daddiu %[src_yuy2], %[src_yuy2], 16 \n\t"
- "daddiu %[dst_y], %[dst_y], 8 \n\t"
- "daddiu %[width], %[width], -8 \n\t"
- "bgtz %[width], 1b \n\t"
- "nop \n\t"
- : [t0] "=&f"(temp[0]), [t1] "=&f"(temp[1])
- : [src_yuy2] "r"(src_yuy2), [dst_y] "r"(dst_y), [width] "r"(width),
- [c0] "f"(c0)
- : "memory");
-}
-
-// Filter 2 rows of UYVY UV's (422) into U and V (420).
-void UYVYToUVRow_MMI(const uint8_t* src_uyvy,
- int src_stride_uyvy,
- uint8_t* dst_u,
- uint8_t* dst_v,
- int width) {
- // Output a row of UV values.
- uint64_t c0 = 0x00ff00ff00ff00ff;
- uint64_t temp[3];
- uint64_t data[4];
- uint64_t shift = 0x08;
- uint64_t src_stride = 0x0;
- __asm__ volatile(
- "1: \n\t"
- "gsldrc1 %[t0], 0x00(%[src_uyvy]) \n\t"
- "gsldlc1 %[t0], 0x07(%[src_uyvy]) \n\t"
- "daddu %[src_stride], %[src_uyvy], %[src_stride_uyvy] \n\t"
- "gsldrc1 %[t1], 0x00(%[src_stride]) \n\t"
- "gsldlc1 %[t1], 0x07(%[src_stride]) \n\t"
- "pavgb %[t0], %[t0], %[t1] \n\t"
-
- "gsldrc1 %[t2], 0x08(%[src_uyvy]) \n\t"
- "gsldlc1 %[t2], 0x0f(%[src_uyvy]) \n\t"
- "gsldrc1 %[t1], 0x08(%[src_stride]) \n\t"
- "gsldlc1 %[t1], 0x0f(%[src_stride]) \n\t"
- "pavgb %[t1], %[t2], %[t1] \n\t"
-
- "and %[t0], %[t0], %[c0] \n\t"
- "and %[t1], %[t1], %[c0] \n\t"
- "packushb %[t0], %[t0], %[t1] \n\t"
- "mov.s %[t1], %[t0] \n\t"
- "and %[d0], %[t0], %[c0] \n\t"
- "psrlh %[d1], %[t1], %[shift] \n\t"
-
- "gsldrc1 %[t0], 0x10(%[src_uyvy]) \n\t"
- "gsldlc1 %[t0], 0x17(%[src_uyvy]) \n\t"
- "gsldrc1 %[t1], 0x10(%[src_stride]) \n\t"
- "gsldlc1 %[t1], 0x17(%[src_stride]) \n\t"
- "pavgb %[t0], %[t0], %[t1] \n\t"
-
- "gsldrc1 %[t2], 0x18(%[src_uyvy]) \n\t"
- "gsldlc1 %[t2], 0x1f(%[src_uyvy]) \n\t"
- "gsldrc1 %[t1], 0x18(%[src_stride]) \n\t"
- "gsldlc1 %[t1], 0x1f(%[src_stride]) \n\t"
- "pavgb %[t1], %[t2], %[t1] \n\t"
-
- "and %[t0], %[t0], %[c0] \n\t"
- "and %[t1], %[t1], %[c0] \n\t"
- "packushb %[t0], %[t0], %[t1] \n\t"
- "mov.s %[t1], %[t0] \n\t"
- "and %[d2], %[t0], %[c0] \n\t"
- "psrlh %[d3], %[t1], %[shift] \n\t"
-
- "packushb %[d0], %[d0], %[d2] \n\t"
- "packushb %[d1], %[d1], %[d3] \n\t"
- "gssdrc1 %[d0], 0x0(%[dst_u]) \n\t"
- "gssdlc1 %[d0], 0x7(%[dst_u]) \n\t"
- "gssdrc1 %[d1], 0x0(%[dst_v]) \n\t"
- "gssdlc1 %[d1], 0x7(%[dst_v]) \n\t"
- "daddiu %[src_uyvy], %[src_uyvy], 32 \n\t"
- "daddiu %[dst_u], %[dst_u], 8 \n\t"
- "daddiu %[dst_v], %[dst_v], 8 \n\t"
- "daddiu %[width], %[width], -16 \n\t"
- "bgtz %[width], 1b \n\t"
- "nop \n\t"
- : [t0] "=&f"(temp[0]), [t1] "=&f"(temp[1]), [t2] "=&f"(temp[2]),
- [d0] "=&f"(data[0]), [d1] "=&f"(data[1]), [d2] "=&f"(data[2]),
- [d3] "=&f"(data[3]), [src_stride] "=&r"(src_stride)
- : [src_uyvy] "r"(src_uyvy), [src_stride_uyvy] "r"(src_stride_uyvy),
- [dst_u] "r"(dst_u), [dst_v] "r"(dst_v), [width] "r"(width),
- [c0] "f"(c0), [shift] "f"(shift)
- : "memory");
-}
-
-// Copy row of UYVY UV's (422) into U and V (422).
-void UYVYToUV422Row_MMI(const uint8_t* src_uyvy,
- uint8_t* dst_u,
- uint8_t* dst_v,
- int width) {
- // Output a row of UV values.
- uint64_t c0 = 0x00ff00ff00ff00ff;
- uint64_t temp[2];
- uint64_t data[4];
- uint64_t shift = 0x08;
- __asm__ volatile(
- "1: \n\t"
- "gsldrc1 %[t0], 0x00(%[src_uyvy]) \n\t"
- "gsldlc1 %[t0], 0x07(%[src_uyvy]) \n\t"
- "gsldrc1 %[t1], 0x08(%[src_uyvy]) \n\t"
- "gsldlc1 %[t1], 0x0f(%[src_uyvy]) \n\t"
- "and %[t0], %[t0], %[c0] \n\t"
- "and %[t1], %[t1], %[c0] \n\t"
- "packushb %[t0], %[t0], %[t1] \n\t"
- "mov.s %[t1], %[t0] \n\t"
- "and %[d0], %[t0], %[c0] \n\t"
- "psrlh %[d1], %[t1], %[shift] \n\t"
-
- "gsldrc1 %[t0], 0x10(%[src_uyvy]) \n\t"
- "gsldlc1 %[t0], 0x17(%[src_uyvy]) \n\t"
- "gsldrc1 %[t1], 0x18(%[src_uyvy]) \n\t"
- "gsldlc1 %[t1], 0x1f(%[src_uyvy]) \n\t"
- "and %[t0], %[t0], %[c0] \n\t"
- "and %[t1], %[t1], %[c0] \n\t"
- "packushb %[t0], %[t0], %[t1] \n\t"
- "mov.s %[t1], %[t0] \n\t"
- "and %[d2], %[t0], %[c0] \n\t"
- "psrlh %[d3], %[t1], %[shift] \n\t"
-
- "packushb %[d0], %[d0], %[d2] \n\t"
- "packushb %[d1], %[d1], %[d3] \n\t"
- "gssdrc1 %[d0], 0x0(%[dst_u]) \n\t"
- "gssdlc1 %[d0], 0x7(%[dst_u]) \n\t"
- "gssdrc1 %[d1], 0x0(%[dst_v]) \n\t"
- "gssdlc1 %[d1], 0x7(%[dst_v]) \n\t"
- "daddiu %[src_uyvy], %[src_uyvy], 32 \n\t"
- "daddiu %[dst_u], %[dst_u], 8 \n\t"
- "daddiu %[dst_v], %[dst_v], 8 \n\t"
- "daddiu %[width], %[width], -16 \n\t"
- "bgtz %[width], 1b \n\t"
- "nop \n\t"
- : [t0] "=&f"(temp[0]), [t1] "=&f"(temp[1]), [d0] "=&f"(data[0]),
- [d1] "=&f"(data[1]), [d2] "=&f"(data[2]), [d3] "=&f"(data[3])
- : [src_uyvy] "r"(src_uyvy), [dst_u] "r"(dst_u), [dst_v] "r"(dst_v),
- [width] "r"(width), [c0] "f"(c0), [shift] "f"(shift)
- : "memory");
-}
-
-// Copy row of UYVY Y's (422) into Y (420/422).
-void UYVYToYRow_MMI(const uint8_t* src_uyvy, uint8_t* dst_y, int width) {
- // Output a row of Y values.
- uint64_t c0 = 0x00ff00ff00ff00ff;
- uint64_t shift = 0x08;
- uint64_t temp[2];
- __asm__ volatile(
- "1: \n\t"
- "gsldrc1 %[t0], 0x00(%[src_uyvy]) \n\t"
- "gsldlc1 %[t0], 0x07(%[src_uyvy]) \n\t"
- "gsldrc1 %[t1], 0x08(%[src_uyvy]) \n\t"
- "gsldlc1 %[t1], 0x0f(%[src_uyvy]) \n\t"
- "dsrl %[t0], %[t0], %[shift] \n\t"
- "dsrl %[t1], %[t1], %[shift] \n\t"
- "and %[t0], %[t0], %[c0] \n\t"
- "and %[t1], %[t1], %[c0] \n\t"
- "and %[t1], %[t1], %[c0] \n\t"
- "packushb %[t0], %[t0], %[t1] \n\t"
- "gssdrc1 %[t0], 0x0(%[dst_y]) \n\t"
- "gssdlc1 %[t0], 0x7(%[dst_y]) \n\t"
- "daddiu %[src_uyvy], %[src_uyvy], 16 \n\t"
- "daddiu %[dst_y], %[dst_y], 8 \n\t"
- "daddiu %[width], %[width], -8 \n\t"
- "bgtz %[width], 1b \n\t"
- "nop \n\t"
- : [t0] "=&f"(temp[0]), [t1] "=&f"(temp[1])
- : [src_uyvy] "r"(src_uyvy), [dst_y] "r"(dst_y), [width] "r"(width),
- [c0] "f"(c0), [shift] "f"(shift)
- : "memory");
-}
-
-// Blend src_argb over src_argb1 and store to dst_argb.
-// dst_argb may be src_argb or src_argb1.
-// This code mimics the SSSE3 version for better testability.
-void ARGBBlendRow_MMI(const uint8_t* src_argb,
- const uint8_t* src_argb1,
- uint8_t* dst_argb,
- int width) {
- uint64_t src0, src1, dest, alpha, src0_hi, src0_lo, src1_hi, src1_lo, dest_hi,
- dest_lo;
- const uint64_t mask0 = 0x0;
- const uint64_t mask1 = 0x00FFFFFF00FFFFFFULL;
- const uint64_t mask2 = 0x00FF00FF00FF00FFULL;
- const uint64_t mask3 = 0xFF;
- const uint64_t mask4 = ~mask1;
- const uint64_t shift = 0x08;
-
- __asm__ volatile(
- "1: \n\t"
- "gsldlc1 %[src0], 0x07(%[src0_ptr]) \n\t"
- "gsldrc1 %[src0], 0x00(%[src0_ptr]) \n\t"
- "punpcklbh %[src0_lo], %[src0], %[mask0] \n\t"
-
- "gsldlc1 %[src1], 0x07(%[src1_ptr]) \n\t"
- "gsldrc1 %[src1], 0x00(%[src1_ptr]) \n\t"
- "punpcklbh %[src1_lo], %[src1], %[mask0] \n\t"
-
- "psubush %[alpha], %[mask2], %[src0_lo] \n\t"
- "pshufh %[alpha], %[alpha], %[mask3] \n\t"
- "pmullh %[dest_lo], %[src1_lo], %[alpha] \n\t"
- "psrlh %[dest_lo], %[dest_lo], %[shift] \n\t"
- "paddush %[dest_lo], %[dest_lo], %[src0_lo] \n\t"
-
- "punpckhbh %[src0_hi], %[src0], %[mask0] \n\t"
- "punpckhbh %[src1_hi], %[src1], %[mask0] \n\t"
-
- "psubush %[alpha], %[mask2], %[src0_hi] \n\t"
- "pshufh %[alpha], %[alpha], %[mask3] \n\t"
- "pmullh %[dest_hi], %[src1_hi], %[alpha] \n\t"
- "psrlh %[dest_hi], %[dest_hi], %[shift] \n\t"
- "paddush %[dest_hi], %[dest_hi], %[src0_hi] \n\t"
-
- "packushb %[dest], %[dest_lo], %[dest_hi] \n\t"
- "and %[dest], %[dest], %[mask1] \n\t"
- "or %[dest], %[dest], %[mask4] \n\t"
- "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
- "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
-
- "daddiu %[src0_ptr], %[src0_ptr], 0x08 \n\t"
- "daddiu %[src1_ptr], %[src1_ptr], 0x08 \n\t"
- "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t"
- "daddi %[width], %[width], -0x02 \n\t"
- "bnez %[width], 1b \n\t"
- : [src0] "=&f"(src0), [src1] "=&f"(src1), [alpha] "=&f"(alpha),
- [dest] "=&f"(dest), [src0_hi] "=&f"(src0_hi), [src0_lo] "=&f"(src0_lo),
- [src1_hi] "=&f"(src1_hi), [src1_lo] "=&f"(src1_lo),
- [dest_hi] "=&f"(dest_hi), [dest_lo] "=&f"(dest_lo)
- : [src0_ptr] "r"(src_argb), [src1_ptr] "r"(src_argb1),
- [dst_ptr] "r"(dst_argb), [mask0] "f"(mask0), [mask1] "f"(mask1),
- [mask2] "f"(mask2), [mask3] "f"(mask3), [mask4] "f"(mask4),
- [shift] "f"(shift), [width] "r"(width)
- : "memory");
-}
-
-void BlendPlaneRow_MMI(const uint8_t* src0,
- const uint8_t* src1,
- const uint8_t* alpha,
- uint8_t* dst,
- int width) {
- uint64_t source0, source1, dest, alph;
- uint64_t src0_hi, src0_lo, src1_hi, src1_lo, alpha_hi, alpha_lo, dest_hi,
- dest_lo;
- uint64_t alpha_rev, alpha_rev_lo, alpha_rev_hi;
- const uint64_t mask0 = 0x0;
- const uint64_t mask1 = 0xFFFFFFFFFFFFFFFFULL;
- const uint64_t mask2 = 0x00FF00FF00FF00FFULL;
- const uint64_t shift = 0x08;
-
- __asm__ volatile(
- "1: \n\t"
- "gsldlc1 %[src0], 0x07(%[src0_ptr]) \n\t"
- "gsldrc1 %[src0], 0x00(%[src0_ptr]) \n\t"
- "punpcklbh %[src0_lo], %[src0], %[mask0] \n\t"
- "punpckhbh %[src0_hi], %[src0], %[mask0] \n\t"
-
- "gsldlc1 %[src1], 0x07(%[src1_ptr]) \n\t"
- "gsldrc1 %[src1], 0x00(%[src1_ptr]) \n\t"
- "punpcklbh %[src1_lo], %[src1], %[mask0] \n\t"
- "punpckhbh %[src1_hi], %[src1], %[mask0] \n\t"
-
- "gsldlc1 %[alpha], 0x07(%[alpha_ptr]) \n\t"
- "gsldrc1 %[alpha], 0x00(%[alpha_ptr]) \n\t"
- "psubusb %[alpha_r], %[mask1], %[alpha] \n\t"
- "punpcklbh %[alpha_lo], %[alpha], %[mask0] \n\t"
- "punpckhbh %[alpha_hi], %[alpha], %[mask0] \n\t"
- "punpcklbh %[alpha_rlo], %[alpha_r], %[mask0] \n\t"
- "punpckhbh %[alpha_rhi], %[alpha_r], %[mask0] \n\t"
-
- "pmullh %[dest_lo], %[src0_lo], %[alpha_lo] \n\t"
- "pmullh %[dest], %[src1_lo], %[alpha_rlo] \n\t"
- "paddush %[dest_lo], %[dest_lo], %[dest] \n\t"
- "paddush %[dest_lo], %[dest_lo], %[mask2] \n\t"
- "psrlh %[dest_lo], %[dest_lo], %[shift] \n\t"
-
- "pmullh %[dest_hi], %[src0_hi], %[alpha_hi] \n\t"
- "pmullh %[dest], %[src1_hi], %[alpha_rhi] \n\t"
- "paddush %[dest_hi], %[dest_hi], %[dest] \n\t"
- "paddush %[dest_hi], %[dest_hi], %[mask2] \n\t"
- "psrlh %[dest_hi], %[dest_hi], %[shift] \n\t"
-
- "packushb %[dest], %[dest_lo], %[dest_hi] \n\t"
- "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
- "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
-
- "daddiu %[src0_ptr], %[src0_ptr], 0x08 \n\t"
- "daddiu %[src1_ptr], %[src1_ptr], 0x08 \n\t"
- "daddiu %[alpha_ptr], %[alpha_ptr], 0x08 \n\t"
- "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t"
- "daddi %[width], %[width], -0x08 \n\t"
- "bnez %[width], 1b \n\t"
- : [src0] "=&f"(source0), [src1] "=&f"(source1), [alpha] "=&f"(alph),
- [dest] "=&f"(dest), [src0_hi] "=&f"(src0_hi), [src0_lo] "=&f"(src0_lo),
- [src1_hi] "=&f"(src1_hi), [src1_lo] "=&f"(src1_lo),
- [alpha_hi] "=&f"(alpha_hi), [alpha_lo] "=&f"(alpha_lo),
- [dest_hi] "=&f"(dest_hi), [dest_lo] "=&f"(dest_lo),
- [alpha_rlo] "=&f"(alpha_rev_lo), [alpha_rhi] "=&f"(alpha_rev_hi),
- [alpha_r] "=&f"(alpha_rev)
- : [src0_ptr] "r"(src0), [src1_ptr] "r"(src1), [alpha_ptr] "r"(alpha),
- [dst_ptr] "r"(dst), [mask0] "f"(mask0), [mask1] "f"(mask1),
- [mask2] "f"(mask2), [shift] "f"(shift), [width] "r"(width)
- : "memory");
-}
-
-// Multiply source RGB by alpha and store to destination.
-// This code mimics the SSSE3 version for better testability.
-void ARGBAttenuateRow_MMI(const uint8_t* src_argb,
- uint8_t* dst_argb,
- int width) {
- uint64_t src, src_hi, src_lo, dest, dest_hi, dest_lo, alpha;
- const uint64_t mask0 = 0xFF;
- const uint64_t mask1 = 0xFF000000FF000000ULL;
- const uint64_t mask2 = ~mask1;
- const uint64_t shift = 0x08;
-
- __asm__ volatile(
- "1: \n\t"
- "gsldlc1 %[src], 0x07(%[src_ptr]) \n\t"
- "gsldrc1 %[src], 0x00(%[src_ptr]) \n\t"
- "punpcklbh %[src_lo], %[src], %[src] \n\t"
- "punpckhbh %[src_hi], %[src], %[src] \n\t"
-
- "pshufh %[alpha], %[src_lo], %[mask0] \n\t"
- "pmulhuh %[dest_lo], %[alpha], %[src_lo] \n\t"
- "psrlh %[dest_lo], %[dest_lo], %[shift] \n\t"
- "pshufh %[alpha], %[src_hi], %[mask0] \n\t"
- "pmulhuh %[dest_hi], %[alpha], %[src_hi] \n\t"
- "psrlh %[dest_hi], %[dest_hi], %[shift] \n\t"
-
- "packushb %[dest], %[dest_lo], %[dest_hi] \n\t"
- "and %[dest], %[dest], %[mask2] \n\t"
- "and %[src], %[src], %[mask1] \n\t"
- "or %[dest], %[dest], %[src] \n\t"
- "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
- "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
-
- "daddiu %[src_ptr], %[src_ptr], 0x08 \n\t"
- "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t"
- "daddi %[width], %[width], -0x02 \n\t"
- "bnez %[width], 1b \n\t"
- : [src] "=&f"(src), [dest] "=&f"(dest), [src_hi] "=&f"(src_hi),
- [src_lo] "=&f"(src_lo), [dest_hi] "=&f"(dest_hi),
- [dest_lo] "=&f"(dest_lo), [alpha] "=&f"(alpha)
- : [src_ptr] "r"(src_argb), [dst_ptr] "r"(dst_argb), [mask0] "f"(mask0),
- [mask1] "f"(mask1), [mask2] "f"(mask2), [shift] "f"(shift),
- [width] "r"(width)
- : "memory");
-}
-
-void ComputeCumulativeSumRow_MMI(const uint8_t* row,
- int32_t* cumsum,
- const int32_t* previous_cumsum,
- int width) {
- int64_t row_sum[2] = {0, 0};
- uint64_t src, dest0, dest1, presrc0, presrc1, dest;
- const uint64_t mask = 0x0;
-
- __asm__ volatile(
- "xor %[row_sum0], %[row_sum0], %[row_sum0] \n\t"
- "xor %[row_sum1], %[row_sum1], %[row_sum1] \n\t"
-
- "1: \n\t"
- "gslwlc1 %[src], 0x03(%[row_ptr]) \n\t"
- "gslwrc1 %[src], 0x00(%[row_ptr]) \n\t"
-
- "punpcklbh %[src], %[src], %[mask] \n\t"
- "punpcklhw %[dest0], %[src], %[mask] \n\t"
- "punpckhhw %[dest1], %[src], %[mask] \n\t"
-
- "paddw %[row_sum0], %[row_sum0], %[dest0] \n\t"
- "paddw %[row_sum1], %[row_sum1], %[dest1] \n\t"
-
- "gsldlc1 %[presrc0], 0x07(%[pre_ptr]) \n\t"
- "gsldrc1 %[presrc0], 0x00(%[pre_ptr]) \n\t"
- "gsldlc1 %[presrc1], 0x0f(%[pre_ptr]) \n\t"
- "gsldrc1 %[presrc1], 0x08(%[pre_ptr]) \n\t"
-
- "paddw %[dest0], %[row_sum0], %[presrc0] \n\t"
- "paddw %[dest1], %[row_sum1], %[presrc1] \n\t"
-
- "gssdlc1 %[dest0], 0x07(%[dst_ptr]) \n\t"
- "gssdrc1 %[dest0], 0x00(%[dst_ptr]) \n\t"
- "gssdlc1 %[dest1], 0x0f(%[dst_ptr]) \n\t"
- "gssdrc1 %[dest1], 0x08(%[dst_ptr]) \n\t"
-
- "daddiu %[row_ptr], %[row_ptr], 0x04 \n\t"
- "daddiu %[pre_ptr], %[pre_ptr], 0x10 \n\t"
- "daddiu %[dst_ptr], %[dst_ptr], 0x10 \n\t"
- "daddi %[width], %[width], -0x01 \n\t"
- "bnez %[width], 1b \n\t"
- : [src] "=&f"(src), [dest] "=&f"(dest), [dest0] "=&f"(dest0),
- [dest1] "=&f"(dest1), [row_sum0] "+&f"(row_sum[0]),
- [row_sum1] "+&f"(row_sum[1]), [presrc0] "=&f"(presrc0),
- [presrc1] "=&f"(presrc1)
- : [row_ptr] "r"(row), [pre_ptr] "r"(previous_cumsum),
- [dst_ptr] "r"(cumsum), [width] "r"(width), [mask] "f"(mask)
- : "memory");
-}
-
-// C version 2x2 -> 2x1.
-void InterpolateRow_MMI(uint8_t* dst_ptr,
- const uint8_t* src_ptr,
- ptrdiff_t src_stride,
- int width,
- int source_y_fraction) {
- if (source_y_fraction == 0) {
- __asm__ volatile(
- "1: \n\t"
- "ld $t0, 0x0(%[src_ptr]) \n\t"
- "sd $t0, 0x0(%[dst_ptr]) \n\t"
- "daddiu %[src_ptr], %[src_ptr], 8 \n\t"
- "daddiu %[dst_ptr], %[dst_ptr], 8 \n\t"
- "daddiu %[width], %[width], -8 \n\t"
- "bgtz %[width], 1b \n\t"
- "nop \n\t"
- :
- : [dst_ptr] "r"(dst_ptr), [src_ptr] "r"(src_ptr), [width] "r"(width)
- : "memory");
- return;
- }
- if (source_y_fraction == 128) {
- uint64_t uv = 0x0;
- uint64_t uv_stride = 0x0;
- __asm__ volatile(
- "1: \n\t"
- "gsldrc1 %[uv], 0x0(%[src_ptr]) \n\t"
- "gsldlc1 %[uv], 0x7(%[src_ptr]) \n\t"
- "daddu $t0, %[src_ptr], %[stride] \n\t"
- "gsldrc1 %[uv_stride], 0x0($t0) \n\t"
- "gsldlc1 %[uv_stride], 0x7($t0) \n\t"
-
- "pavgb %[uv], %[uv], %[uv_stride] \n\t"
- "gssdrc1 %[uv], 0x0(%[dst_ptr]) \n\t"
- "gssdlc1 %[uv], 0x7(%[dst_ptr]) \n\t"
-
- "daddiu %[src_ptr], %[src_ptr], 8 \n\t"
- "daddiu %[dst_ptr], %[dst_ptr], 8 \n\t"
- "daddiu %[width], %[width], -8 \n\t"
- "bgtz %[width], 1b \n\t"
- "nop \n\t"
- : [uv] "=&f"(uv), [uv_stride] "=&f"(uv_stride)
- : [src_ptr] "r"(src_ptr), [dst_ptr] "r"(dst_ptr), [width] "r"(width),
- [stride] "r"((int64_t)src_stride)
- : "memory");
- return;
- }
- const uint8_t* src_ptr1 = src_ptr + src_stride;
- uint64_t temp;
- uint64_t data[4];
- uint64_t zero = 0x0;
- uint64_t c0 = 0x0080008000800080;
- uint64_t fy0 = 0x0100010001000100;
- uint64_t shift = 0x8;
- __asm__ volatile(
- "pshufh %[fy1], %[fy1], %[zero] \n\t"
- "psubh %[fy0], %[fy0], %[fy1] \n\t"
- "1: \n\t"
- "gsldrc1 %[t0], 0x0(%[src_ptr]) \n\t"
- "gsldlc1 %[t0], 0x7(%[src_ptr]) \n\t"
- "punpcklbh %[d0], %[t0], %[zero] \n\t"
- "punpckhbh %[d1], %[t0], %[zero] \n\t"
- "gsldrc1 %[t0], 0x0(%[src_ptr1]) \n\t"
- "gsldlc1 %[t0], 0x7(%[src_ptr1]) \n\t"
- "punpcklbh %[d2], %[t0], %[zero] \n\t"
- "punpckhbh %[d3], %[t0], %[zero] \n\t"
-
- "pmullh %[d0], %[d0], %[fy0] \n\t"
- "pmullh %[d2], %[d2], %[fy1] \n\t"
- "paddh %[d0], %[d0], %[d2] \n\t"
- "paddh %[d0], %[d0], %[c0] \n\t"
- "psrlh %[d0], %[d0], %[shift] \n\t"
-
- "pmullh %[d1], %[d1], %[fy0] \n\t"
- "pmullh %[d3], %[d3], %[fy1] \n\t"
- "paddh %[d1], %[d1], %[d3] \n\t"
- "paddh %[d1], %[d1], %[c0] \n\t"
- "psrlh %[d1], %[d1], %[shift] \n\t"
-
- "packushb %[d0], %[d0], %[d1] \n\t"
- "gssdrc1 %[d0], 0x0(%[dst_ptr]) \n\t"
- "gssdlc1 %[d0], 0x7(%[dst_ptr]) \n\t"
- "daddiu %[src_ptr], %[src_ptr], 8 \n\t"
- "daddiu %[src_ptr1], %[src_ptr1], 8 \n\t"
- "daddiu %[dst_ptr], %[dst_ptr], 8 \n\t"
- "daddiu %[width], %[width], -8 \n\t"
- "bgtz %[width], 1b \n\t"
- "nop \n\t"
- : [t0] "=&f"(temp), [d0] "=&f"(data[0]), [d1] "=&f"(data[1]),
- [d2] "=&f"(data[2]), [d3] "=&f"(data[3])
- : [src_ptr] "r"(src_ptr), [src_ptr1] "r"(src_ptr1),
- [dst_ptr] "r"(dst_ptr), [width] "r"(width),
- [fy1] "f"(source_y_fraction), [fy0] "f"(fy0), [c0] "f"(c0),
- [shift] "f"(shift), [zero] "f"(zero)
- : "memory");
-}
-
-// Use first 4 shuffler values to reorder ARGB channels.
-void ARGBShuffleRow_MMI(const uint8_t* src_argb,
- uint8_t* dst_argb,
- const uint8_t* shuffler,
- int width) {
- uint64_t source, dest0, dest1, dest;
- const uint64_t mask0 = 0x0;
- const uint64_t mask1 = (shuffler[0] & 0x03) | ((shuffler[1] & 0x03) << 2) |
- ((shuffler[2] & 0x03) << 4) |
- ((shuffler[3] & 0x03) << 6);
-
- __asm__ volatile(
- "1: \n\t"
- "gsldlc1 %[src], 0x07(%[src_ptr]) \n\t"
- "gsldrc1 %[src], 0x00(%[src_ptr]) \n\t"
-
- "punpcklbh %[dest0], %[src], %[mask0] \n\t"
- "pshufh %[dest0], %[dest0], %[mask1] \n\t"
- "punpckhbh %[dest1], %[src], %[mask0] \n\t"
- "pshufh %[dest1], %[dest1], %[mask1] \n\t"
- "packushb %[dest], %[dest0], %[dest1] \n\t"
-
- "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
- "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
-
- "daddiu %[src_ptr], %[src_ptr], 0x08 \n\t"
- "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t"
- "daddi %[width], %[width], -0x02 \n\t"
- "bnez %[width], 1b \n\t"
- : [src] "=&f"(source), [dest] "=&f"(dest), [dest0] "=&f"(dest0),
- [dest1] "=&f"(dest1)
- : [src_ptr] "r"(src_argb), [dst_ptr] "r"(dst_argb), [mask0] "f"(mask0),
- [mask1] "f"(mask1), [width] "r"(width)
- : "memory");
-}
-
-void I422ToYUY2Row_MMI(const uint8_t* src_y,
- const uint8_t* src_u,
- const uint8_t* src_v,
- uint8_t* dst_frame,
- int width) {
- uint64_t temp[3];
- uint64_t vu = 0x0;
- __asm__ volatile(
- "1: \n\t"
- "gsldlc1 %[ty], 0x7(%[src_y]) \n\t" // r=src_sobelx[i]
- "gsldrc1 %[ty], 0x0(%[src_y]) \n\t" // r=src_sobelx[i]
- "gslwlc1 %[tu], 0x3(%[src_u]) \n\t" // b=src_sobely[i]
- "gslwrc1 %[tu], 0x0(%[src_u]) \n\t" // b=src_sobely[i]
- "gslwlc1 %[tv], 0x3(%[src_v]) \n\t" // b=src_sobely[i]
- "gslwrc1 %[tv], 0x0(%[src_v]) \n\t" // b=src_sobely[i]
- "punpcklbh %[vu], %[tu], %[tv] \n\t" // g
- "punpcklbh %[tu], %[ty], %[vu] \n\t" // g
- "gssdlc1 %[tu], 0x7(%[dst_frame]) \n\t"
- "gssdrc1 %[tu], 0x0(%[dst_frame]) \n\t"
- "punpckhbh %[tu], %[ty], %[vu] \n\t" // g
- "gssdlc1 %[tu], 0x0F(%[dst_frame]) \n\t"
- "gssdrc1 %[tu], 0x08(%[dst_frame]) \n\t"
- "daddiu %[src_y], %[src_y], 8 \n\t"
- "daddiu %[src_u], %[src_u], 4 \n\t"
- "daddiu %[src_v], %[src_v], 4 \n\t"
- "daddiu %[dst_frame], %[dst_frame], 16 \n\t"
- "daddiu %[width], %[width], -8 \n\t"
- "bgtz %[width], 1b \n\t"
- "nop \n\t"
- : [ty] "=&f"(temp[1]), [tu] "=&f"(temp[1]), [tv] "=&f"(temp[1]),
- [vu] "=&f"(vu)
- : [src_y] "r"(src_y), [src_u] "r"(src_u), [src_v] "r"(src_v),
- [dst_frame] "r"(dst_frame), [width] "r"(width)
- : "memory");
-}
-
-void I422ToUYVYRow_MMI(const uint8_t* src_y,
- const uint8_t* src_u,
- const uint8_t* src_v,
- uint8_t* dst_frame,
- int width) {
- uint64_t temp[3];
- uint64_t vu = 0x0;
- __asm__ volatile(
- "1: \n\t"
- "gsldlc1 %[ty], 0x7(%[src_y]) \n\t" // r=src_sobelx[i]
- "gsldrc1 %[ty], 0x0(%[src_y]) \n\t" // r=src_sobelx[i]
- "gslwlc1 %[tu], 0x3(%[src_u]) \n\t" // b=src_sobely[i]
- "gslwrc1 %[tu], 0x0(%[src_u]) \n\t" // b=src_sobely[i]
- "gslwlc1 %[tv], 0x3(%[src_v]) \n\t" // b=src_sobely[i]
- "gslwrc1 %[tv], 0x0(%[src_v]) \n\t" // b=src_sobely[i]
- "punpcklbh %[vu], %[tu], %[tv] \n\t" // g
- "punpcklbh %[tu], %[vu], %[ty] \n\t" // g
- "gssdlc1 %[tu], 0x7(%[dst_frame]) \n\t"
- "gssdrc1 %[tu], 0x0(%[dst_frame]) \n\t"
- "punpckhbh %[tu], %[vu], %[ty] \n\t" // g
- "gssdlc1 %[tu], 0x0F(%[dst_frame]) \n\t"
- "gssdrc1 %[tu], 0x08(%[dst_frame]) \n\t"
- "daddiu %[src_y], %[src_y], 8 \n\t"
- "daddiu %[src_u], %[src_u], 4 \n\t"
- "daddiu %[src_v], %[src_v], 4 \n\t"
- "daddiu %[dst_frame], %[dst_frame], 16 \n\t"
- "daddiu %[width], %[width], -8 \n\t"
- "bgtz %[width], 1b \n\t"
- "nop \n\t"
- : [ty] "=&f"(temp[1]), [tu] "=&f"(temp[1]), [tv] "=&f"(temp[1]),
- [vu] "=&f"(vu)
- : [src_y] "r"(src_y), [src_u] "r"(src_u), [src_v] "r"(src_v),
- [dst_frame] "r"(dst_frame), [width] "r"(width)
- : "memory");
-}
-
-void ARGBCopyAlphaRow_MMI(const uint8_t* src, uint8_t* dst, int width) {
- uint64_t source, dest;
- const uint64_t mask0 = 0xff000000ff000000ULL;
- const uint64_t mask1 = ~mask0;
-
- __asm__ volatile(
- "1: \n\t"
- "gsldlc1 %[src], 0x07(%[src_ptr]) \n\t"
- "gsldrc1 %[src], 0x00(%[src_ptr]) \n\t"
- "gsldlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
- "gsldrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
-
- "and %[src], %[src], %[mask0] \n\t"
- "and %[dest], %[dest], %[mask1] \n\t"
- "or %[dest], %[src], %[dest] \n\t"
- "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
- "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
-
- "daddiu %[src_ptr], %[src_ptr], 0x08 \n\t"
- "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t"
- "daddi %[width], %[width], -0x02 \n\t"
- "bnez %[width], 1b \n\t"
- : [src] "=&f"(source), [dest] "=&f"(dest)
- : [src_ptr] "r"(src), [dst_ptr] "r"(dst), [mask0] "f"(mask0),
- [mask1] "f"(mask1), [width] "r"(width)
- : "memory");
-}
-
-void ARGBExtractAlphaRow_MMI(const uint8_t* src_argb,
- uint8_t* dst_a,
- int width) {
- uint64_t src, dest0, dest1, dest_lo, dest_hi, dest;
- const uint64_t mask = 0xff000000ff000000ULL;
- const uint64_t shift = 0x18;
-
- __asm__ volatile(
- "1: \n\t"
- "gsldlc1 %[src], 0x07(%[src_ptr]) \n\t"
- "gsldrc1 %[src], 0x00(%[src_ptr]) \n\t"
- "and %[dest0], %[src], %[mask] \n\t"
- "psrlw %[dest0], %[dest0], %[shift] \n\t"
- "gsldlc1 %[src], 0x0f(%[src_ptr]) \n\t"
- "gsldrc1 %[src], 0x08(%[src_ptr]) \n\t"
- "and %[dest1], %[src], %[mask] \n\t"
- "psrlw %[dest1], %[dest1], %[shift] \n\t"
- "packsswh %[dest_lo], %[dest0], %[dest1] \n\t"
-
- "gsldlc1 %[src], 0x17(%[src_ptr]) \n\t"
- "gsldrc1 %[src], 0x10(%[src_ptr]) \n\t"
- "and %[dest0], %[src], %[mask] \n\t"
- "psrlw %[dest0], %[dest0], %[shift] \n\t"
- "gsldlc1 %[src], 0x1f(%[src_ptr]) \n\t"
- "gsldrc1 %[src], 0x18(%[src_ptr]) \n\t"
- "and %[dest1], %[src], %[mask] \n\t"
- "psrlw %[dest1], %[dest1], %[shift] \n\t"
- "packsswh %[dest_hi], %[dest0], %[dest1] \n\t"
-
- "packushb %[dest], %[dest_lo], %[dest_hi] \n\t"
-
- "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
- "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
-
- "daddiu %[src_ptr], %[src_ptr], 0x20 \n\t"
- "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t"
- "daddi %[width], %[width], -0x08 \n\t"
- "bnez %[width], 1b \n\t"
- : [src] "=&f"(src), [dest] "=&f"(dest), [dest0] "=&f"(dest0),
- [dest1] "=&f"(dest1), [dest_lo] "=&f"(dest_lo), [dest_hi] "=&f"(dest_hi)
- : [src_ptr] "r"(src_argb), [dst_ptr] "r"(dst_a), [mask] "f"(mask),
- [shift] "f"(shift), [width] "r"(width)
- : "memory");
-}
-
-void ARGBCopyYToAlphaRow_MMI(const uint8_t* src, uint8_t* dst, int width) {
- uint64_t source, dest0, dest1, dest;
- const uint64_t mask0 = 0x0;
- const uint64_t mask1 = 0x00ffffff00ffffffULL;
-
- __asm__ volatile(
- "1: \n\t"
- "gsldlc1 %[src], 0x07(%[src_ptr]) \n\t"
- "gsldrc1 %[src], 0x00(%[src_ptr]) \n\t"
-
- "punpcklbh %[dest0], %[mask0], %[src] \n\t"
- "punpcklhw %[dest1], %[mask0], %[dest0] \n\t"
- "gsldlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
- "gsldrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
- "and %[dest], %[dest], %[mask1] \n\t"
- "or %[dest], %[dest], %[dest1] \n\t"
- "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
- "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
- "punpckhhw %[dest1], %[mask0], %[dest0] \n\t"
- "gsldlc1 %[dest], 0x0f(%[dst_ptr]) \n\t"
- "gsldrc1 %[dest], 0x08(%[dst_ptr]) \n\t"
- "and %[dest], %[dest], %[mask1] \n\t"
- "or %[dest], %[dest], %[dest1] \n\t"
- "gssdlc1 %[dest], 0x0f(%[dst_ptr]) \n\t"
- "gssdrc1 %[dest], 0x08(%[dst_ptr]) \n\t"
-
- "punpckhbh %[dest0], %[mask0], %[src] \n\t"
- "punpcklhw %[dest1], %[mask0], %[dest0] \n\t"
- "gsldlc1 %[dest], 0x17(%[dst_ptr]) \n\t"
- "gsldrc1 %[dest], 0x10(%[dst_ptr]) \n\t"
- "and %[dest], %[dest], %[mask1] \n\t"
- "or %[dest], %[dest], %[dest1] \n\t"
- "gssdlc1 %[dest], 0x17(%[dst_ptr]) \n\t"
- "gssdrc1 %[dest], 0x10(%[dst_ptr]) \n\t"
- "punpckhhw %[dest1], %[mask0], %[dest0] \n\t"
- "gsldlc1 %[dest], 0x1f(%[dst_ptr]) \n\t"
- "gsldrc1 %[dest], 0x18(%[dst_ptr]) \n\t"
- "and %[dest], %[dest], %[mask1] \n\t"
- "or %[dest], %[dest], %[dest1] \n\t"
- "gssdlc1 %[dest], 0x1f(%[dst_ptr]) \n\t"
- "gssdrc1 %[dest], 0x18(%[dst_ptr]) \n\t"
-
- "daddiu %[src_ptr], %[src_ptr], 0x08 \n\t"
- "daddiu %[dst_ptr], %[dst_ptr], 0x20 \n\t"
- "daddi %[width], %[width], -0x08 \n\t"
- "bnez %[width], 1b \n\t"
- : [src] "=&f"(source), [dest] "=&f"(dest), [dest0] "=&f"(dest0),
- [dest1] "=&f"(dest1)
- : [src_ptr] "r"(src), [dst_ptr] "r"(dst), [mask0] "f"(mask0),
- [mask1] "f"(mask1), [width] "r"(width)
- : "memory");
-}
-
-void I444ToARGBRow_MMI(const uint8_t* src_y,
- const uint8_t* src_u,
- const uint8_t* src_v,
- uint8_t* rgb_buf,
- const struct YuvConstants* yuvconstants,
- int width) {
- uint64_t y,u,v;
- uint64_t b_vec[2],g_vec[2],r_vec[2];
- uint64_t mask = 0xff00ff00ff00ff00ULL;
- uint64_t ub,ug,vg,vr,bb,bg,br,yg;
- __asm__ volatile (
- "ldc1 %[yg], 0xc0(%[yuvcons_ptr]) \n\t"//yg
- "ldc1 %[bb], 0x60(%[yuvcons_ptr]) \n\t"//bb
- "ldc1 %[ub], 0x00(%[yuvcons_ptr]) \n\t"//ub
- "or %[ub], %[ub], %[mask] \n\t"//must sign extension
- "ldc1 %[bg], 0x80(%[yuvcons_ptr]) \n\t"//bg
- "ldc1 %[ug], 0x20(%[yuvcons_ptr]) \n\t"//ug
- "punpcklbh %[ug], %[ug], %[zero] \n\t"
- "pshufh %[ug], %[ug], %[zero] \n\t"
- "ldc1 %[vg], 0x20(%[yuvcons_ptr]) \n\t"//vg
- "punpcklbh %[vg], %[vg], %[zero] \n\t"
- "pshufh %[vg], %[vg], %[five] \n\t"
- "ldc1 %[br], 0xa0(%[yuvcons_ptr]) \n\t"//br
- "ldc1 %[vr], 0x40(%[yuvcons_ptr]) \n\t"//vr
- "punpcklbh %[vr], %[vr], %[zero] \n\t"
- "pshufh %[vr], %[vr], %[five] \n\t"
- "or %[vr], %[vr], %[mask] \n\t"//sign extension
-
- "1: \n\t"
- "gslwlc1 %[y], 0x03(%[y_ptr]) \n\t"
- "gslwrc1 %[y], 0x00(%[y_ptr]) \n\t"
- "gslwlc1 %[u], 0x03(%[u_ptr]) \n\t"
- "gslwrc1 %[u], 0x00(%[u_ptr]) \n\t"
- "gslwlc1 %[v], 0x03(%[v_ptr]) \n\t"
- "gslwrc1 %[v], 0x00(%[v_ptr]) \n\t"
-
- "punpcklbh %[y], %[y], %[y] \n\t"//y*0x0101
- "pmulhuh %[y], %[y], %[yg] \n\t"//y1
-
- "punpcklbh %[u], %[u], %[zero] \n\t"//u
- "paddsh %[b_vec0], %[y], %[bb] \n\t"
- "pmullh %[b_vec1], %[u], %[ub] \n\t"
- "psubsh %[b_vec0], %[b_vec0], %[b_vec1] \n\t"
- "psrah %[b_vec0], %[b_vec0], %[six] \n\t"
-
- "punpcklbh %[v], %[v], %[zero] \n\t"//v
- "paddsh %[g_vec0], %[y], %[bg] \n\t"
- "pmullh %[g_vec1], %[u], %[ug] \n\t"//u*ug
- "psubsh %[g_vec0], %[g_vec0], %[g_vec1] \n\t"
- "pmullh %[g_vec1], %[v], %[vg] \n\t"//v*vg
- "psubsh %[g_vec0], %[g_vec0], %[g_vec1] \n\t"
- "psrah %[g_vec0], %[g_vec0], %[six] \n\t"
-
- "paddsh %[r_vec0], %[y], %[br] \n\t"
- "pmullh %[r_vec1], %[v], %[vr] \n\t"//v*vr
- "psubsh %[r_vec0], %[r_vec0], %[r_vec1] \n\t"
- "psrah %[r_vec0], %[r_vec0], %[six] \n\t"
-
- "packushb %[r_vec0], %[b_vec0], %[r_vec0] \n\t"//rrrrbbbb
- "packushb %[g_vec0], %[g_vec0], %[alpha] \n\t"//ffffgggg
- "punpcklwd %[g_vec0], %[g_vec0], %[alpha] \n\t"
- "punpcklbh %[b_vec0], %[r_vec0], %[g_vec0] \n\t"//gbgbgbgb
- "punpckhbh %[r_vec0], %[r_vec0], %[g_vec0] \n\t"//frfrfrfr
- "punpcklhw %[g_vec0], %[b_vec0], %[r_vec0] \n\t"//frgbfrgb
- "punpckhhw %[g_vec1], %[b_vec0], %[r_vec0] \n\t"//frgbfrgb
- "gssdlc1 %[g_vec0], 0x07(%[rgbbuf_ptr]) \n\t"
- "gssdrc1 %[g_vec0], 0x00(%[rgbbuf_ptr]) \n\t"
- "gssdlc1 %[g_vec1], 0x0f(%[rgbbuf_ptr]) \n\t"
- "gssdrc1 %[g_vec1], 0x08(%[rgbbuf_ptr]) \n\t"
-
- "daddiu %[y_ptr], %[y_ptr], 0x04 \n\t"
- "daddiu %[u_ptr], %[u_ptr], 0x04 \n\t"
- "daddiu %[v_ptr], %[v_ptr], 0x04 \n\t"
- "daddiu %[rgbbuf_ptr], %[rgbbuf_ptr], 0x10 \n\t"
- "daddi %[width], %[width], -0x04 \n\t"
- "bnez %[width], 1b \n\t"
- : [y]"=&f"(y),
- [u]"=&f"(u), [v]"=&f"(v),
- [b_vec0]"=&f"(b_vec[0]), [b_vec1]"=&f"(b_vec[1]),
- [g_vec0]"=&f"(g_vec[0]), [g_vec1]"=&f"(g_vec[1]),
- [r_vec0]"=&f"(r_vec[0]), [r_vec1]"=&f"(r_vec[1]),
- [ub]"=&f"(ub), [ug]"=&f"(ug),
- [vg]"=&f"(vg), [vr]"=&f"(vr),
- [bb]"=&f"(bb), [bg]"=&f"(bg),
- [br]"=&f"(br), [yg]"=&f"(yg)
- : [y_ptr]"r"(src_y), [u_ptr]"r"(src_u),
- [v_ptr]"r"(src_v), [rgbbuf_ptr]"r"(rgb_buf),
- [yuvcons_ptr]"r"(yuvconstants), [width]"r"(width),
- [zero]"f"(0x00), [alpha]"f"(-1),
- [six]"f"(0x6), [five]"f"(0x55),
- [mask]"f"(mask)
- : "memory"
- );
-}
-
-// Also used for 420
-void I422ToARGBRow_MMI(const uint8_t* src_y,
- const uint8_t* src_u,
- const uint8_t* src_v,
- uint8_t* rgb_buf,
- const struct YuvConstants* yuvconstants,
- int width) {
- uint64_t y,u,v;
- uint64_t b_vec[2],g_vec[2],r_vec[2];
- uint64_t mask = 0xff00ff00ff00ff00ULL;
- uint64_t ub,ug,vg,vr,bb,bg,br,yg;
-
- __asm__ volatile(
- "ldc1 %[yg], 0xc0(%[yuvcons_ptr]) \n\t"//yg
- "ldc1 %[bb], 0x60(%[yuvcons_ptr]) \n\t"//bb
- "ldc1 %[ub], 0x00(%[yuvcons_ptr]) \n\t"//ub
- "or %[ub], %[ub], %[mask] \n\t"//must sign extension
- "ldc1 %[bg], 0x80(%[yuvcons_ptr]) \n\t"//bg
- "ldc1 %[ug], 0x20(%[yuvcons_ptr]) \n\t"//ug
- "punpcklbh %[ug], %[ug], %[zero] \n\t"
- "pshufh %[ug], %[ug], %[zero] \n\t"
- "ldc1 %[vg], 0x20(%[yuvcons_ptr]) \n\t"//vg
- "punpcklbh %[vg], %[vg], %[zero] \n\t"
- "pshufh %[vg], %[vg], %[five] \n\t"
- "ldc1 %[br], 0xa0(%[yuvcons_ptr]) \n\t"//br
- "ldc1 %[vr], 0x40(%[yuvcons_ptr]) \n\t"//vr
- "punpcklbh %[vr], %[vr], %[zero] \n\t"
- "pshufh %[vr], %[vr], %[five] \n\t"
- "or %[vr], %[vr], %[mask] \n\t"//sign extension
-
- "1: \n\t"
- "gslwlc1 %[y], 0x03(%[y_ptr]) \n\t"
- "gslwrc1 %[y], 0x00(%[y_ptr]) \n\t"
- "gslwlc1 %[u], 0x03(%[u_ptr]) \n\t"
- "gslwrc1 %[u], 0x00(%[u_ptr]) \n\t"
- "gslwlc1 %[v], 0x03(%[v_ptr]) \n\t"
- "gslwrc1 %[v], 0x00(%[v_ptr]) \n\t"
-
- "punpcklbh %[y], %[y], %[y] \n\t"//y*0x0101
- "pmulhuh %[y], %[y], %[yg] \n\t"//y1
-
- //u3|u2|u1|u0 --> u1|u1|u0|u0
- "punpcklbh %[u], %[u], %[u] \n\t"//u
- "punpcklbh %[u], %[u], %[zero] \n\t"
- "paddsh %[b_vec0], %[y], %[bb] \n\t"
- "pmullh %[b_vec1], %[u], %[ub] \n\t"
- "psubsh %[b_vec0], %[b_vec0], %[b_vec1] \n\t"
- "psrah %[b_vec0], %[b_vec0], %[six] \n\t"
-
- //v3|v2|v1|v0 --> v1|v1|v0|v0
- "punpcklbh %[v], %[v], %[v] \n\t"//v
- "punpcklbh %[v], %[v], %[zero] \n\t"
- "paddsh %[g_vec0], %[y], %[bg] \n\t"
- "pmullh %[g_vec1], %[u], %[ug] \n\t"//u*ug
- "psubsh %[g_vec0], %[g_vec0], %[g_vec1] \n\t"
- "pmullh %[g_vec1], %[v], %[vg] \n\t"//v*vg
- "psubsh %[g_vec0], %[g_vec0], %[g_vec1] \n\t"
- "psrah %[g_vec0], %[g_vec0], %[six] \n\t"
-
- "paddsh %[r_vec0], %[y], %[br] \n\t"
- "pmullh %[r_vec1], %[v], %[vr] \n\t"//v*vr
- "psubsh %[r_vec0], %[r_vec0], %[r_vec1] \n\t"
- "psrah %[r_vec0], %[r_vec0], %[six] \n\t"
-
- "packushb %[r_vec0], %[b_vec0], %[r_vec0] \n\t"//rrrrbbbb
- "packushb %[g_vec0], %[g_vec0], %[alpha] \n\t"//ffffgggg
- "punpcklwd %[g_vec0], %[g_vec0], %[alpha] \n\t"
- "punpcklbh %[b_vec0], %[r_vec0], %[g_vec0] \n\t"//gbgbgbgb
- "punpckhbh %[r_vec0], %[r_vec0], %[g_vec0] \n\t"//frfrfrfr
- "punpcklhw %[g_vec0], %[b_vec0], %[r_vec0] \n\t"//frgbfrgb
- "punpckhhw %[g_vec1], %[b_vec0], %[r_vec0] \n\t"//frgbfrgb
- "gssdlc1 %[g_vec0], 0x07(%[rgbbuf_ptr]) \n\t"
- "gssdrc1 %[g_vec0], 0x00(%[rgbbuf_ptr]) \n\t"
- "gssdlc1 %[g_vec1], 0x0f(%[rgbbuf_ptr]) \n\t"
- "gssdrc1 %[g_vec1], 0x08(%[rgbbuf_ptr]) \n\t"
-
- "daddiu %[y_ptr], %[y_ptr], 0x04 \n\t"
- "daddiu %[u_ptr], %[u_ptr], 0x02 \n\t"
- "daddiu %[v_ptr], %[v_ptr], 0x02 \n\t"
- "daddiu %[rgbbuf_ptr], %[rgbbuf_ptr], 0x10 \n\t"
- "daddi %[width], %[width], -0x04 \n\t"
- "bnez %[width], 1b \n\t"
-
- : [y]"=&f"(y),
- [u]"=&f"(u), [v]"=&f"(v),
- [b_vec0]"=&f"(b_vec[0]), [b_vec1]"=&f"(b_vec[1]),
- [g_vec0]"=&f"(g_vec[0]), [g_vec1]"=&f"(g_vec[1]),
- [r_vec0]"=&f"(r_vec[0]), [r_vec1]"=&f"(r_vec[1]),
- [ub]"=&f"(ub), [ug]"=&f"(ug),
- [vg]"=&f"(vg), [vr]"=&f"(vr),
- [bb]"=&f"(bb), [bg]"=&f"(bg),
- [br]"=&f"(br), [yg]"=&f"(yg)
- : [y_ptr]"r"(src_y), [u_ptr]"r"(src_u),
- [v_ptr]"r"(src_v), [rgbbuf_ptr]"r"(rgb_buf),
- [yuvcons_ptr]"r"(yuvconstants), [width]"r"(width),
- [zero]"f"(0x00), [alpha]"f"(-1),
- [six]"f"(0x6), [five]"f"(0x55),
- [mask]"f"(mask)
- : "memory"
- );
-}
-
-// 10 bit YUV to ARGB
-void I210ToARGBRow_MMI(const uint16_t* src_y,
- const uint16_t* src_u,
- const uint16_t* src_v,
- uint8_t* rgb_buf,
- const struct YuvConstants* yuvconstants,
- int width) {
- uint64_t y,u,v;
- uint64_t b_vec[2],g_vec[2],r_vec[2];
- uint64_t mask = 0xff00ff00ff00ff00ULL;
- uint64_t ub,ug,vg,vr,bb,bg,br,yg;
-
- __asm__ volatile(
- "ldc1 %[yg], 0xc0(%[yuvcons_ptr]) \n\t"
- "ldc1 %[bb], 0x60(%[yuvcons_ptr]) \n\t"
- "ldc1 %[ub], 0x00(%[yuvcons_ptr]) \n\t"
- "or %[ub], %[ub], %[mask] \n\t"
- "ldc1 %[bg], 0x80(%[yuvcons_ptr]) \n\t"
- "ldc1 %[ug], 0x20(%[yuvcons_ptr]) \n\t"
- "punpcklbh %[ug], %[ug], %[zero] \n\t"
- "pshufh %[ug], %[ug], %[zero] \n\t"
- "ldc1 %[vg], 0x20(%[yuvcons_ptr]) \n\t"
- "punpcklbh %[vg], %[vg], %[zero] \n\t"
- "pshufh %[vg], %[vg], %[five] \n\t"
- "ldc1 %[br], 0xa0(%[yuvcons_ptr]) \n\t"
- "ldc1 %[vr], 0x40(%[yuvcons_ptr]) \n\t"
- "punpcklbh %[vr], %[vr], %[zero] \n\t"
- "pshufh %[vr], %[vr], %[five] \n\t"
- "or %[vr], %[vr], %[mask] \n\t"
-
- "1: \n\t"
- "gsldlc1 %[y], 0x07(%[y_ptr]) \n\t"
- "gsldrc1 %[y], 0x00(%[y_ptr]) \n\t"
- "gslwlc1 %[u], 0x03(%[u_ptr]) \n\t"
- "gslwrc1 %[u], 0x00(%[u_ptr]) \n\t"
- "gslwlc1 %[v], 0x03(%[v_ptr]) \n\t"
- "gslwrc1 %[v], 0x00(%[v_ptr]) \n\t"
-
- "psllh %[y], %[y], %[six] \n\t"
- "pmulhuh %[y], %[y], %[yg] \n\t"
-
- "punpcklhw %[u], %[u], %[u] \n\t"
- "psrah %[u], %[u], %[two] \n\t"
- "punpcklhw %[v], %[v], %[v] \n\t"
- "psrah %[v], %[v], %[two] \n\t"
- "pminsh %[u], %[u], %[mask1] \n\t"
- "pminsh %[v], %[v], %[mask1] \n\t"
-
- "paddsh %[b_vec0], %[y], %[bb] \n\t"
- "pmullh %[b_vec1], %[u], %[ub] \n\t"
- "psubsh %[b_vec0], %[b_vec0], %[b_vec1] \n\t"
-
- "paddsh %[g_vec0], %[y], %[bg] \n\t"
- "pmullh %[g_vec1], %[u], %[ug] \n\t"
- "psubsh %[g_vec0], %[g_vec0], %[g_vec1] \n\t"
- "pmullh %[g_vec1], %[v], %[vg] \n\t"
- "psubsh %[g_vec0], %[g_vec0], %[g_vec1] \n\t"
-
- "paddsh %[r_vec0], %[y], %[br] \n\t"
- "pmullh %[r_vec1], %[v], %[vr] \n\t"
- "psubsh %[r_vec0], %[r_vec0], %[r_vec1] \n\t"
-
- "psrah %[b_vec0], %[b_vec0], %[six] \n\t"
- "psrah %[g_vec0], %[g_vec0], %[six] \n\t"
- "psrah %[r_vec0], %[r_vec0], %[six] \n\t"
-
- "packushb %[r_vec0], %[b_vec0], %[r_vec0] \n\t"
- "packushb %[g_vec0], %[g_vec0], %[alpha] \n\t"
- "punpcklwd %[g_vec0], %[g_vec0], %[alpha] \n\t"
- "punpcklbh %[b_vec0], %[r_vec0], %[g_vec0] \n\t"
- "punpckhbh %[r_vec0], %[r_vec0], %[g_vec0] \n\t"
- "punpcklhw %[g_vec0], %[b_vec0], %[r_vec0] \n\t"
- "punpckhhw %[g_vec1], %[b_vec0], %[r_vec0] \n\t"
- "gssdlc1 %[g_vec0], 0x07(%[rgbbuf_ptr]) \n\t"
- "gssdrc1 %[g_vec0], 0x00(%[rgbbuf_ptr]) \n\t"
- "gssdlc1 %[g_vec1], 0x0f(%[rgbbuf_ptr]) \n\t"
- "gssdrc1 %[g_vec1], 0x08(%[rgbbuf_ptr]) \n\t"
-
- "daddiu %[y_ptr], %[y_ptr], 0x08 \n\t"
- "daddiu %[u_ptr], %[u_ptr], 0x04 \n\t"
- "daddiu %[v_ptr], %[v_ptr], 0x04 \n\t"
- "daddiu %[rgbbuf_ptr], %[rgbbuf_ptr], 0x10 \n\t"
- "daddi %[width], %[width], -0x04 \n\t"
- "bnez %[width], 1b \n\t"
-
- : [y]"=&f"(y),
- [u]"=&f"(u), [v]"=&f"(v),
- [b_vec0]"=&f"(b_vec[0]), [b_vec1]"=&f"(b_vec[1]),
- [g_vec0]"=&f"(g_vec[0]), [g_vec1]"=&f"(g_vec[1]),
- [r_vec0]"=&f"(r_vec[0]), [r_vec1]"=&f"(r_vec[1]),
- [ub]"=&f"(ub), [ug]"=&f"(ug),
- [vg]"=&f"(vg), [vr]"=&f"(vr),
- [bb]"=&f"(bb), [bg]"=&f"(bg),
- [br]"=&f"(br), [yg]"=&f"(yg)
- : [y_ptr]"r"(src_y), [u_ptr]"r"(src_u),
- [v_ptr]"r"(src_v), [rgbbuf_ptr]"r"(rgb_buf),
- [yuvcons_ptr]"r"(yuvconstants), [width]"r"(width),
- [zero]"f"(0x00), [alpha]"f"(-1),
- [six]"f"(0x6), [five]"f"(0x55),
- [mask]"f"(mask), [two]"f"(0x02),
- [mask1]"f"(0x00ff00ff00ff00ff)
- : "memory"
- );
-}
-
-void I422AlphaToARGBRow_MMI(const uint8_t* src_y,
- const uint8_t* src_u,
- const uint8_t* src_v,
- const uint8_t* src_a,
- uint8_t* rgb_buf,
- const struct YuvConstants* yuvconstants,
- int width) {
- uint64_t y,u,v,a;
- uint64_t b_vec[2],g_vec[2],r_vec[2];
- uint64_t mask = 0xff00ff00ff00ff00ULL;
- uint64_t ub,ug,vg,vr,bb,bg,br,yg;
-
- __asm__ volatile(
- "ldc1 %[yg], 0xc0(%[yuvcons_ptr]) \n\t"
- "ldc1 %[bb], 0x60(%[yuvcons_ptr]) \n\t"
- "ldc1 %[ub], 0x00(%[yuvcons_ptr]) \n\t"
- "or %[ub], %[ub], %[mask] \n\t"
- "ldc1 %[bg], 0x80(%[yuvcons_ptr]) \n\t"
- "ldc1 %[ug], 0x20(%[yuvcons_ptr]) \n\t"
- "punpcklbh %[ug], %[ug], %[zero] \n\t"
- "pshufh %[ug], %[ug], %[zero] \n\t"
- "ldc1 %[vg], 0x20(%[yuvcons_ptr]) \n\t"
- "punpcklbh %[vg], %[vg], %[zero] \n\t"
- "pshufh %[vg], %[vg], %[five] \n\t"
- "ldc1 %[br], 0xa0(%[yuvcons_ptr]) \n\t"
- "ldc1 %[vr], 0x40(%[yuvcons_ptr]) \n\t"
- "punpcklbh %[vr], %[vr], %[zero] \n\t"
- "pshufh %[vr], %[vr], %[five] \n\t"
- "or %[vr], %[vr], %[mask] \n\t"
-
- "1: \n\t"
- "gslwlc1 %[y], 0x03(%[y_ptr]) \n\t"
- "gslwrc1 %[y], 0x00(%[y_ptr]) \n\t"
- "gslwlc1 %[u], 0x03(%[u_ptr]) \n\t"
- "gslwrc1 %[u], 0x00(%[u_ptr]) \n\t"
- "gslwlc1 %[v], 0x03(%[v_ptr]) \n\t"
- "gslwrc1 %[v], 0x00(%[v_ptr]) \n\t"
- "gslwlc1 %[a], 0x03(%[a_ptr]) \n\t"
- "gslwrc1 %[a], 0x00(%[a_ptr]) \n\t"
-
- "punpcklbh %[y], %[y], %[y] \n\t"//y*0x0101
- "pmulhuh %[y], %[y], %[yg] \n\t"//y1
-
- //u3|u2|u1|u0 --> u1|u1|u0|u0
- "punpcklbh %[u], %[u], %[u] \n\t"//u
- "punpcklbh %[u], %[u], %[zero] \n\t"
- "paddsh %[b_vec0], %[y], %[bb] \n\t"
- "pmullh %[b_vec1], %[u], %[ub] \n\t"
- "psubsh %[b_vec0], %[b_vec0], %[b_vec1] \n\t"
- "psrah %[b_vec0], %[b_vec0], %[six] \n\t"
-
- //v3|v2|v1|v0 --> v1|v1|v0|v0
- "punpcklbh %[v], %[v], %[v] \n\t"
- "punpcklbh %[v], %[v], %[zero] \n\t"
- "paddsh %[g_vec0], %[y], %[bg] \n\t"
- "pmullh %[g_vec1], %[u], %[ug] \n\t"
- "psubsh %[g_vec0], %[g_vec0], %[g_vec1] \n\t"
- "pmullh %[g_vec1], %[v], %[vg] \n\t"
- "psubsh %[g_vec0], %[g_vec0], %[g_vec1] \n\t"
- "psrah %[g_vec0], %[g_vec0], %[six] \n\t"
-
- "paddsh %[r_vec0], %[y], %[br] \n\t"
- "pmullh %[r_vec1], %[v], %[vr] \n\t"
- "psubsh %[r_vec0], %[r_vec0], %[r_vec1] \n\t"
- "psrah %[r_vec0], %[r_vec0], %[six] \n\t"
-
- "packushb %[r_vec0], %[b_vec0], %[r_vec0] \n\t"//rrrrbbbb
- "packushb %[g_vec0], %[g_vec0], %[a] \n\t"
- "punpcklwd %[g_vec0], %[g_vec0], %[a] \n\t"//aaaagggg
- "punpcklbh %[b_vec0], %[r_vec0], %[g_vec0] \n\t"
- "punpckhbh %[r_vec0], %[r_vec0], %[g_vec0] \n\t"
- "punpcklhw %[g_vec0], %[b_vec0], %[r_vec0] \n\t"
- "punpckhhw %[g_vec1], %[b_vec0], %[r_vec0] \n\t"
- "gssdlc1 %[g_vec0], 0x07(%[rgbbuf_ptr]) \n\t"
- "gssdrc1 %[g_vec0], 0x00(%[rgbbuf_ptr]) \n\t"
- "gssdlc1 %[g_vec1], 0x0f(%[rgbbuf_ptr]) \n\t"
- "gssdrc1 %[g_vec1], 0x08(%[rgbbuf_ptr]) \n\t"
-
- "daddiu %[y_ptr], %[y_ptr], 0x04 \n\t"
- "daddiu %[a_ptr], %[a_ptr], 0x04 \n\t"
- "daddiu %[u_ptr], %[u_ptr], 0x02 \n\t"
- "daddiu %[v_ptr], %[v_ptr], 0x02 \n\t"
- "daddiu %[rgbbuf_ptr], %[rgbbuf_ptr], 0x10 \n\t"
- "daddi %[width], %[width], -0x04 \n\t"
- "bnez %[width], 1b \n\t"
-
- : [y]"=&f"(y), [u]"=&f"(u),
- [v]"=&f"(v), [a]"=&f"(a),
- [b_vec0]"=&f"(b_vec[0]), [b_vec1]"=&f"(b_vec[1]),
- [g_vec0]"=&f"(g_vec[0]), [g_vec1]"=&f"(g_vec[1]),
- [r_vec0]"=&f"(r_vec[0]), [r_vec1]"=&f"(r_vec[1]),
- [ub]"=&f"(ub), [ug]"=&f"(ug),
- [vg]"=&f"(vg), [vr]"=&f"(vr),
- [bb]"=&f"(bb), [bg]"=&f"(bg),
- [br]"=&f"(br), [yg]"=&f"(yg)
- : [y_ptr]"r"(src_y), [u_ptr]"r"(src_u),
- [v_ptr]"r"(src_v), [rgbbuf_ptr]"r"(rgb_buf),
- [yuvcons_ptr]"r"(yuvconstants), [width]"r"(width),
- [a_ptr]"r"(src_a), [zero]"f"(0x00),
- [six]"f"(0x6), [five]"f"(0x55),
- [mask]"f"(mask)
- : "memory"
- );
-}
-
-void I422ToRGB24Row_MMI(const uint8_t* src_y,
- const uint8_t* src_u,
- const uint8_t* src_v,
- uint8_t* rgb_buf,
- const struct YuvConstants* yuvconstants,
- int width) {
- uint64_t y,u,v;
- uint64_t b_vec[2],g_vec[2],r_vec[2];
- uint64_t mask = 0xff00ff00ff00ff00ULL;
- uint64_t ub,ug,vg,vr,bb,bg,br,yg;
-
- __asm__ volatile(
- "ldc1 %[yg], 0xc0(%[yuvcons_ptr]) \n\t"
- "ldc1 %[bb], 0x60(%[yuvcons_ptr]) \n\t"
- "ldc1 %[ub], 0x00(%[yuvcons_ptr]) \n\t"
- "or %[ub], %[ub], %[mask] \n\t"
- "ldc1 %[bg], 0x80(%[yuvcons_ptr]) \n\t"
- "ldc1 %[ug], 0x20(%[yuvcons_ptr]) \n\t"
- "punpcklbh %[ug], %[ug], %[zero] \n\t"
- "pshufh %[ug], %[ug], %[zero] \n\t"
- "ldc1 %[vg], 0x20(%[yuvcons_ptr]) \n\t"
- "punpcklbh %[vg], %[vg], %[zero] \n\t"
- "pshufh %[vg], %[vg], %[five] \n\t"
- "ldc1 %[br], 0xa0(%[yuvcons_ptr]) \n\t"
- "ldc1 %[vr], 0x40(%[yuvcons_ptr]) \n\t"
- "punpcklbh %[vr], %[vr], %[zero] \n\t"
- "pshufh %[vr], %[vr], %[five] \n\t"
- "or %[vr], %[vr], %[mask] \n\t"
-
- "1: \n\t"
- "gslwlc1 %[y], 0x03(%[y_ptr]) \n\t"
- "gslwrc1 %[y], 0x00(%[y_ptr]) \n\t"
- "gslwlc1 %[u], 0x03(%[u_ptr]) \n\t"
- "gslwrc1 %[u], 0x00(%[u_ptr]) \n\t"
- "gslwlc1 %[v], 0x03(%[v_ptr]) \n\t"
- "gslwrc1 %[v], 0x00(%[v_ptr]) \n\t"
-
- "punpcklbh %[y], %[y], %[y] \n\t"//y*0x0101
- "pmulhuh %[y], %[y], %[yg] \n\t"//y1
-
- //u3|u2|u1|u0 --> u1|u1|u0|u0
- "punpcklbh %[u], %[u], %[u] \n\t"//u
- "punpcklbh %[u], %[u], %[zero] \n\t"
- "paddsh %[b_vec0], %[y], %[bb] \n\t"
- "pmullh %[b_vec1], %[u], %[ub] \n\t"
- "psubsh %[b_vec0], %[b_vec0], %[b_vec1] \n\t"
- "psrah %[b_vec0], %[b_vec0], %[six] \n\t"
-
- //v3|v2|v1|v0 --> v1|v1|v0|v0
- "punpcklbh %[v], %[v], %[v] \n\t"
- "punpcklbh %[v], %[v], %[zero] \n\t"
- "paddsh %[g_vec0], %[y], %[bg] \n\t"
- "pmullh %[g_vec1], %[u], %[ug] \n\t"
- "psubsh %[g_vec0], %[g_vec0], %[g_vec1] \n\t"
- "pmullh %[g_vec1], %[v], %[vg] \n\t"
- "psubsh %[g_vec0], %[g_vec0], %[g_vec1] \n\t"
- "psrah %[g_vec0], %[g_vec0], %[six] \n\t"
-
- "paddsh %[r_vec0], %[y], %[br] \n\t"
- "pmullh %[r_vec1], %[v], %[vr] \n\t"
- "psubsh %[r_vec0], %[r_vec0], %[r_vec1] \n\t"
- "psrah %[r_vec0], %[r_vec0], %[six] \n\t"
-
- "packushb %[r_vec0], %[b_vec0], %[r_vec0] \n\t"
- "packushb %[g_vec0], %[g_vec0], %[zero] \n\t"
- "punpcklbh %[b_vec0], %[r_vec0], %[g_vec0] \n\t"
- "punpckhbh %[r_vec0], %[r_vec0], %[g_vec0] \n\t"
- "punpcklhw %[g_vec0], %[b_vec0], %[r_vec0] \n\t"
- "punpckhhw %[g_vec1], %[b_vec0], %[r_vec0] \n\t"
-
- "punpckhwd %[r_vec0], %[g_vec0], %[g_vec0] \n\t"
- "psllw %[r_vec1], %[r_vec0], %[lmove1] \n\t"
- "or %[g_vec0], %[g_vec0], %[r_vec1] \n\t"
- "psrlw %[r_vec1], %[r_vec0], %[rmove1] \n\t"
- "pextrh %[r_vec1], %[r_vec1], %[zero] \n\t"
- "pinsrh_2 %[g_vec0], %[g_vec0], %[r_vec1] \n\t"
- "pextrh %[r_vec1], %[g_vec1], %[zero] \n\t"
- "pinsrh_3 %[g_vec0], %[g_vec0], %[r_vec1] \n\t"
- "pextrh %[r_vec1], %[g_vec1], %[one] \n\t"
- "punpckhwd %[g_vec1], %[g_vec1], %[g_vec1] \n\t"
- "psllw %[g_vec1], %[g_vec1], %[rmove1] \n\t"
- "or %[g_vec1], %[g_vec1], %[r_vec1] \n\t"
- "gssdlc1 %[g_vec0], 0x07(%[rgbbuf_ptr]) \n\t"
- "gssdrc1 %[g_vec0], 0x00(%[rgbbuf_ptr]) \n\t"
- "gsswlc1 %[g_vec1], 0x0b(%[rgbbuf_ptr]) \n\t"
- "gsswrc1 %[g_vec1], 0x08(%[rgbbuf_ptr]) \n\t"
-
-
- "daddiu %[y_ptr], %[y_ptr], 0x04 \n\t"
- "daddiu %[u_ptr], %[u_ptr], 0x02 \n\t"
- "daddiu %[v_ptr], %[v_ptr], 0x02 \n\t"
- "daddiu %[rgbbuf_ptr], %[rgbbuf_ptr], 0x0c \n\t"
- "daddi %[width], %[width], -0x04 \n\t"
- "bnez %[width], 1b \n\t"
-
- : [y]"=&f"(y), [u]"=&f"(u),
- [v]"=&f"(v),
- [b_vec0]"=&f"(b_vec[0]), [b_vec1]"=&f"(b_vec[1]),
- [g_vec0]"=&f"(g_vec[0]), [g_vec1]"=&f"(g_vec[1]),
- [r_vec0]"=&f"(r_vec[0]), [r_vec1]"=&f"(r_vec[1]),
- [ub]"=&f"(ub), [ug]"=&f"(ug),
- [vg]"=&f"(vg), [vr]"=&f"(vr),
- [bb]"=&f"(bb), [bg]"=&f"(bg),
- [br]"=&f"(br), [yg]"=&f"(yg)
- : [y_ptr]"r"(src_y), [u_ptr]"r"(src_u),
- [v_ptr]"r"(src_v), [rgbbuf_ptr]"r"(rgb_buf),
- [yuvcons_ptr]"r"(yuvconstants), [width]"r"(width),
- [zero]"f"(0x00), [five]"f"(0x55),
- [six]"f"(0x6), [mask]"f"(mask),
- [lmove1]"f"(0x18), [rmove1]"f"(0x8),
- [one]"f"(0x1)
- : "memory"
- );
-}
-
-void I422ToARGB4444Row_MMI(const uint8_t* src_y,
- const uint8_t* src_u,
- const uint8_t* src_v,
- uint8_t* dst_argb4444,
- const struct YuvConstants* yuvconstants,
- int width) {
- uint64_t y, u, v;
- uint64_t b_vec, g_vec, r_vec, temp;
- uint64_t ub,ug,vg,vr,bb,bg,br,yg;
-
- __asm__ volatile(
- "ldc1 %[yg], 0xc0(%[yuvcons_ptr]) \n\t"
- "ldc1 %[bb], 0x60(%[yuvcons_ptr]) \n\t"
- "ldc1 %[ub], 0x00(%[yuvcons_ptr]) \n\t"
- "or %[ub], %[ub], %[mask] \n\t"
- "ldc1 %[bg], 0x80(%[yuvcons_ptr]) \n\t"
- "ldc1 %[ug], 0x20(%[yuvcons_ptr]) \n\t"
- "punpcklbh %[ug], %[ug], %[zero] \n\t"
- "pshufh %[ug], %[ug], %[zero] \n\t"
- "ldc1 %[vg], 0x20(%[yuvcons_ptr]) \n\t"
- "punpcklbh %[vg], %[vg], %[zero] \n\t"
- "pshufh %[vg], %[vg], %[five] \n\t"
- "ldc1 %[br], 0xa0(%[yuvcons_ptr]) \n\t"
- "ldc1 %[vr], 0x40(%[yuvcons_ptr]) \n\t"
- "punpcklbh %[vr], %[vr], %[zero] \n\t"
- "pshufh %[vr], %[vr], %[five] \n\t"
- "or %[vr], %[vr], %[mask] \n\t"
-
- "1: \n\t"
- "gslwlc1 %[y], 0x03(%[y_ptr]) \n\t"
- "gslwrc1 %[y], 0x00(%[y_ptr]) \n\t"
- "gslwlc1 %[u], 0x03(%[u_ptr]) \n\t"
- "gslwrc1 %[u], 0x00(%[u_ptr]) \n\t"
- "gslwlc1 %[v], 0x03(%[v_ptr]) \n\t"
- "gslwrc1 %[v], 0x00(%[v_ptr]) \n\t"
-
- "punpcklbh %[y], %[y], %[y] \n\t"//y*0x0101
- "pmulhuh %[y], %[y], %[yg] \n\t"//y1
-
- //u3|u2|u1|u0 --> u1|u1|u0|u0
- "punpcklbh %[u], %[u], %[u] \n\t"//u
- "punpcklbh %[u], %[u], %[zero] \n\t"
- "paddsh %[b_vec], %[y], %[bb] \n\t"
- "pmullh %[temp], %[u], %[ub] \n\t"
- "psubsh %[b_vec], %[b_vec], %[temp] \n\t"
- "psrah %[b_vec], %[b_vec], %[six] \n\t"
-
- //v3|v2|v1|v0 --> v1|v1|v0|v0
- "punpcklbh %[v], %[v], %[v] \n\t"
- "punpcklbh %[v], %[v], %[zero] \n\t"
- "paddsh %[g_vec], %[y], %[bg] \n\t"
- "pmullh %[temp], %[u], %[ug] \n\t"
- "psubsh %[g_vec], %[g_vec], %[temp] \n\t"
- "pmullh %[temp], %[v], %[vg] \n\t"
- "psubsh %[g_vec], %[g_vec], %[temp] \n\t"
- "psrah %[g_vec], %[g_vec], %[six] \n\t"
-
- "paddsh %[r_vec], %[y], %[br] \n\t"
- "pmullh %[temp], %[v], %[vr] \n\t"
- "psubsh %[r_vec], %[r_vec], %[temp] \n\t"
- "psrah %[r_vec], %[r_vec], %[six] \n\t"
-
- "packushb %[r_vec], %[b_vec], %[r_vec] \n\t"
- "packushb %[g_vec], %[g_vec], %[zero] \n\t"
- "punpcklwd %[g_vec], %[g_vec], %[alpha] \n\t"
- "punpcklbh %[b_vec], %[r_vec], %[g_vec] \n\t"
- "punpckhbh %[r_vec], %[r_vec], %[g_vec] \n\t"
- "punpcklhw %[g_vec], %[b_vec], %[r_vec] \n\t"
- "punpckhhw %[b_vec], %[b_vec], %[r_vec] \n\t"
-
- "and %[g_vec], %[g_vec], %[mask1] \n\t"
- "psrlw %[g_vec], %[g_vec], %[four] \n\t"
- "psrlw %[r_vec], %[g_vec], %[four] \n\t"
- "or %[g_vec], %[g_vec], %[r_vec] \n\t"
- "punpcklbh %[r_vec], %[alpha], %[zero] \n\t"
- "and %[g_vec], %[g_vec], %[r_vec] \n\t"
-
- "and %[b_vec], %[b_vec], %[mask1] \n\t"
- "psrlw %[b_vec], %[b_vec], %[four] \n\t"
- "psrlw %[r_vec], %[b_vec], %[four] \n\t"
- "or %[b_vec], %[b_vec], %[r_vec] \n\t"
- "punpcklbh %[r_vec], %[alpha], %[zero] \n\t"
- "and %[b_vec], %[b_vec], %[r_vec] \n\t"
- "packushb %[g_vec], %[g_vec], %[b_vec] \n\t"
-
- "gssdlc1 %[g_vec], 0x07(%[dst_argb4444]) \n\t"
- "gssdrc1 %[g_vec], 0x00(%[dst_argb4444]) \n\t"
-
- "daddiu %[y_ptr], %[y_ptr], 0x04 \n\t"
- "daddiu %[u_ptr], %[u_ptr], 0x02 \n\t"
- "daddiu %[v_ptr], %[v_ptr], 0x02 \n\t"
- "daddiu %[dst_argb4444], %[dst_argb4444], 0x08 \n\t"
- "daddi %[width], %[width], -0x04 \n\t"
- "bnez %[width], 1b \n\t"
-
- : [y]"=&f"(y), [u]"=&f"(u),
- [v]"=&f"(v),
- [b_vec]"=&f"(b_vec), [g_vec]"=&f"(g_vec),
- [r_vec]"=&f"(r_vec), [temp]"=&f"(temp),
- [ub]"=&f"(ub), [ug]"=&f"(ug),
- [vg]"=&f"(vg), [vr]"=&f"(vr),
- [bb]"=&f"(bb), [bg]"=&f"(bg),
- [br]"=&f"(br), [yg]"=&f"(yg)
- : [y_ptr]"r"(src_y), [u_ptr]"r"(src_u),
- [v_ptr]"r"(src_v), [dst_argb4444]"r"(dst_argb4444),
- [yuvcons_ptr]"r"(yuvconstants), [width]"r"(width),
- [zero]"f"(0x00), [five]"f"(0x55),
- [six]"f"(0x6), [mask]"f"(0xff00ff00ff00ff00),
- [four]"f"(0x4), [mask1]"f"(0xf0f0f0f0f0f0f0f0),
- [alpha]"f"(-1)
- : "memory"
- );
-}
-
-void I422ToARGB1555Row_MMI(const uint8_t* src_y,
- const uint8_t* src_u,
- const uint8_t* src_v,
- uint8_t* dst_argb1555,
- const struct YuvConstants* yuvconstants,
- int width) {
- uint64_t y, u, v;
- uint64_t b_vec, g_vec, r_vec, temp;
- uint64_t ub,ug,vg,vr,bb,bg,br,yg;
-
- __asm__ volatile(
- "ldc1 %[yg], 0xc0(%[yuvcons_ptr]) \n\t"
- "ldc1 %[bb], 0x60(%[yuvcons_ptr]) \n\t"
- "ldc1 %[ub], 0x00(%[yuvcons_ptr]) \n\t"
- "or %[ub], %[ub], %[mask1] \n\t"
- "ldc1 %[bg], 0x80(%[yuvcons_ptr]) \n\t"
- "ldc1 %[ug], 0x20(%[yuvcons_ptr]) \n\t"
- "punpcklbh %[ug], %[ug], %[zero] \n\t"
- "pshufh %[ug], %[ug], %[zero] \n\t"
- "ldc1 %[vg], 0x20(%[yuvcons_ptr]) \n\t"
- "punpcklbh %[vg], %[vg], %[zero] \n\t"
- "pshufh %[vg], %[vg], %[five] \n\t"
- "ldc1 %[br], 0xa0(%[yuvcons_ptr]) \n\t"
- "ldc1 %[vr], 0x40(%[yuvcons_ptr]) \n\t"
- "punpcklbh %[vr], %[vr], %[zero] \n\t"
- "pshufh %[vr], %[vr], %[five] \n\t"
- "or %[vr], %[vr], %[mask1] \n\t"
-
- "1: \n\t"
- "gslwlc1 %[y], 0x03(%[y_ptr]) \n\t"
- "gslwrc1 %[y], 0x00(%[y_ptr]) \n\t"
- "gslwlc1 %[u], 0x03(%[u_ptr]) \n\t"
- "gslwrc1 %[u], 0x00(%[u_ptr]) \n\t"
- "gslwlc1 %[v], 0x03(%[v_ptr]) \n\t"
- "gslwrc1 %[v], 0x00(%[v_ptr]) \n\t"
-
- "punpcklbh %[y], %[y], %[y] \n\t"
- "pmulhuh %[y], %[y], %[yg] \n\t"
-
- //u3|u2|u1|u0 --> u1|u1|u0|u0
- "punpcklbh %[u], %[u], %[u] \n\t"
- "punpcklbh %[u], %[u], %[zero] \n\t"
- "paddsh %[b_vec], %[y], %[bb] \n\t"
- "pmullh %[temp], %[u], %[ub] \n\t"
- "psubsh %[b_vec], %[b_vec], %[temp] \n\t"
- "psrah %[b_vec], %[b_vec], %[six] \n\t"
-
- //v3|v2|v1|v0 --> v1|v1|v0|v0
- "punpcklbh %[v], %[v], %[v] \n\t"
- "punpcklbh %[v], %[v], %[zero] \n\t"
- "paddsh %[g_vec], %[y], %[bg] \n\t"
- "pmullh %[temp], %[u], %[ug] \n\t"
- "psubsh %[g_vec], %[g_vec], %[temp] \n\t"
- "pmullh %[temp], %[v], %[vg] \n\t"
- "psubsh %[g_vec], %[g_vec], %[temp] \n\t"
- "psrah %[g_vec], %[g_vec], %[six] \n\t"
-
- "paddsh %[r_vec], %[y], %[br] \n\t"
- "pmullh %[temp], %[v], %[vr] \n\t"
- "psubsh %[r_vec], %[r_vec], %[temp] \n\t"
- "psrah %[r_vec], %[r_vec], %[six] \n\t"
-
- "packushb %[r_vec], %[b_vec], %[r_vec] \n\t"
- "packushb %[g_vec], %[g_vec], %[zero] \n\t"
- "punpcklbh %[b_vec], %[r_vec], %[g_vec] \n\t"
- "punpckhbh %[r_vec], %[r_vec], %[g_vec] \n\t"
- "punpcklhw %[g_vec], %[b_vec], %[r_vec] \n\t"
- "punpckhhw %[b_vec], %[b_vec], %[r_vec] \n\t"
-
- "psrlw %[temp], %[g_vec], %[three] \n\t"
- "and %[g_vec], %[temp], %[mask2] \n\t"
- "psrlw %[temp], %[temp], %[eight] \n\t"
- "and %[r_vec], %[temp], %[mask2] \n\t"
- "psllw %[r_vec], %[r_vec], %[lmove5] \n\t"
- "or %[g_vec], %[g_vec], %[r_vec] \n\t"
- "psrlw %[temp], %[temp], %[eight] \n\t"
- "and %[r_vec], %[temp], %[mask2] \n\t"
- "psllw %[r_vec], %[r_vec], %[lmove5] \n\t"
- "psllw %[r_vec], %[r_vec], %[lmove5] \n\t"
- "or %[g_vec], %[g_vec], %[r_vec] \n\t"
- "or %[g_vec], %[g_vec], %[mask3] \n\t"
-
- "psrlw %[temp], %[b_vec], %[three] \n\t"
- "and %[b_vec], %[temp], %[mask2] \n\t"
- "psrlw %[temp], %[temp], %[eight] \n\t"
- "and %[r_vec], %[temp], %[mask2] \n\t"
- "psllw %[r_vec], %[r_vec], %[lmove5] \n\t"
- "or %[b_vec], %[b_vec], %[r_vec] \n\t"
- "psrlw %[temp], %[temp], %[eight] \n\t"
- "and %[r_vec], %[temp], %[mask2] \n\t"
- "psllw %[r_vec], %[r_vec], %[lmove5] \n\t"
- "psllw %[r_vec], %[r_vec], %[lmove5] \n\t"
- "or %[b_vec], %[b_vec], %[r_vec] \n\t"
- "or %[b_vec], %[b_vec], %[mask3] \n\t"
-
- "punpcklhw %[r_vec], %[g_vec], %[b_vec] \n\t"
- "punpckhhw %[b_vec], %[g_vec], %[b_vec] \n\t"
- "punpcklhw %[g_vec], %[r_vec], %[b_vec] \n\t"
-
- "gssdlc1 %[g_vec], 0x07(%[dst_argb1555]) \n\t"
- "gssdrc1 %[g_vec], 0x00(%[dst_argb1555]) \n\t"
-
- "daddiu %[y_ptr], %[y_ptr], 0x04 \n\t"
- "daddiu %[u_ptr], %[u_ptr], 0x02 \n\t"
- "daddiu %[v_ptr], %[v_ptr], 0x02 \n\t"
- "daddiu %[dst_argb1555], %[dst_argb1555], 0x08 \n\t"
- "daddi %[width], %[width], -0x04 \n\t"
- "bnez %[width], 1b \n\t"
-
- : [y]"=&f"(y), [u]"=&f"(u),
- [v]"=&f"(v),
- [b_vec]"=&f"(b_vec), [g_vec]"=&f"(g_vec),
- [r_vec]"=&f"(r_vec), [temp]"=&f"(temp),
- [ub]"=&f"(ub), [ug]"=&f"(ug),
- [vg]"=&f"(vg), [vr]"=&f"(vr),
- [bb]"=&f"(bb), [bg]"=&f"(bg),
- [br]"=&f"(br), [yg]"=&f"(yg)
- : [y_ptr]"r"(src_y), [u_ptr]"r"(src_u),
- [v_ptr]"r"(src_v), [dst_argb1555]"r"(dst_argb1555),
- [yuvcons_ptr]"r"(yuvconstants), [width]"r"(width),
- [zero]"f"(0x00), [five]"f"(0x55),
- [six]"f"(0x6), [mask1]"f"(0xff00ff00ff00ff00),
- [three]"f"(0x3), [mask2]"f"(0x1f0000001f),
- [eight]"f"(0x8), [mask3]"f"(0x800000008000),
- [lmove5]"f"(0x5)
- : "memory"
- );
-}
-
-void I422ToRGB565Row_MMI(const uint8_t* src_y,
- const uint8_t* src_u,
- const uint8_t* src_v,
- uint8_t* dst_rgb565,
- const struct YuvConstants* yuvconstants,
- int width) {
- uint64_t y, u, v;
- uint64_t b_vec, g_vec, r_vec, temp;
- uint64_t ub,ug,vg,vr,bb,bg,br,yg;
-
- __asm__ volatile(
- "ldc1 %[yg], 0xc0(%[yuvcons_ptr]) \n\t"
- "ldc1 %[bb], 0x60(%[yuvcons_ptr]) \n\t"
- "ldc1 %[ub], 0x00(%[yuvcons_ptr]) \n\t"
- "or %[ub], %[ub], %[mask1] \n\t"
- "ldc1 %[bg], 0x80(%[yuvcons_ptr]) \n\t"
- "ldc1 %[ug], 0x20(%[yuvcons_ptr]) \n\t"
- "punpcklbh %[ug], %[ug], %[zero] \n\t"
- "pshufh %[ug], %[ug], %[zero] \n\t"
- "ldc1 %[vg], 0x20(%[yuvcons_ptr]) \n\t"
- "punpcklbh %[vg], %[vg], %[zero] \n\t"
- "pshufh %[vg], %[vg], %[five] \n\t"
- "ldc1 %[br], 0xa0(%[yuvcons_ptr]) \n\t"
- "ldc1 %[vr], 0x40(%[yuvcons_ptr]) \n\t"
- "punpcklbh %[vr], %[vr], %[zero] \n\t"
- "pshufh %[vr], %[vr], %[five] \n\t"
- "or %[vr], %[vr], %[mask1] \n\t"
-
- "1: \n\t"
- "gslwlc1 %[y], 0x03(%[y_ptr]) \n\t"
- "gslwrc1 %[y], 0x00(%[y_ptr]) \n\t"
- "gslwlc1 %[u], 0x03(%[u_ptr]) \n\t"
- "gslwrc1 %[u], 0x00(%[u_ptr]) \n\t"
- "gslwlc1 %[v], 0x03(%[v_ptr]) \n\t"
- "gslwrc1 %[v], 0x00(%[v_ptr]) \n\t"
-
- "punpcklbh %[y], %[y], %[y] \n\t"
- "pmulhuh %[y], %[y], %[yg] \n\t"
-
- //u3|u2|u1|u0 --> u1|u1|u0|u0
- "punpcklbh %[u], %[u], %[u] \n\t"
- "punpcklbh %[u], %[u], %[zero] \n\t"
- "paddsh %[b_vec], %[y], %[bb] \n\t"
- "pmullh %[temp], %[u], %[ub] \n\t"
- "psubsh %[b_vec], %[b_vec], %[temp] \n\t"
- "psrah %[b_vec], %[b_vec], %[six] \n\t"
-
- //v3|v2|v1|v0 --> v1|v1|v0|v0
- "punpcklbh %[v], %[v], %[v] \n\t"
- "punpcklbh %[v], %[v], %[zero] \n\t"
- "paddsh %[g_vec], %[y], %[bg] \n\t"
- "pmullh %[temp], %[u], %[ug] \n\t"
- "psubsh %[g_vec], %[g_vec], %[temp] \n\t"
- "pmullh %[temp], %[v], %[vg] \n\t"
- "psubsh %[g_vec], %[g_vec], %[temp] \n\t"
- "psrah %[g_vec], %[g_vec], %[six] \n\t"
-
- "paddsh %[r_vec], %[y], %[br] \n\t"
- "pmullh %[temp], %[v], %[vr] \n\t"
- "psubsh %[r_vec], %[r_vec], %[temp] \n\t"
- "psrah %[r_vec], %[r_vec], %[six] \n\t"
-
- "packushb %[r_vec], %[b_vec], %[r_vec] \n\t"
- "packushb %[g_vec], %[g_vec], %[zero] \n\t"
- "punpcklbh %[b_vec], %[r_vec], %[g_vec] \n\t"
- "punpckhbh %[r_vec], %[r_vec], %[g_vec] \n\t"
- "punpcklhw %[g_vec], %[b_vec], %[r_vec] \n\t"
- "punpckhhw %[b_vec], %[b_vec], %[r_vec] \n\t"
-
- "psrlh %[temp], %[g_vec], %[three] \n\t"
- "and %[g_vec], %[temp], %[mask2] \n\t"
- "psrlw %[temp], %[temp], %[seven] \n\t"
- "psrlw %[r_vec], %[mask1], %[eight] \n\t"
- "and %[r_vec], %[temp], %[r_vec] \n\t"
- "psllw %[r_vec], %[r_vec], %[lmove5] \n\t"
- "or %[g_vec], %[g_vec], %[r_vec] \n\t"
- "paddb %[r_vec], %[three], %[six] \n\t"
- "psrlw %[temp], %[temp], %[r_vec] \n\t"
- "and %[r_vec], %[temp], %[mask2] \n\t"
- "paddb %[temp], %[three], %[eight] \n\t"
- "psllw %[r_vec], %[r_vec], %[temp] \n\t"
- "or %[g_vec], %[g_vec], %[r_vec] \n\t"
-
- "psrlh %[temp], %[b_vec], %[three] \n\t"
- "and %[b_vec], %[temp], %[mask2] \n\t"
- "psrlw %[temp], %[temp], %[seven] \n\t"
- "psrlw %[r_vec], %[mask1], %[eight] \n\t"
- "and %[r_vec], %[temp], %[r_vec] \n\t"
- "psllw %[r_vec], %[r_vec], %[lmove5] \n\t"
- "or %[b_vec], %[b_vec], %[r_vec] \n\t"
- "paddb %[r_vec], %[three], %[six] \n\t"
- "psrlw %[temp], %[temp], %[r_vec] \n\t"
- "and %[r_vec], %[temp], %[mask2] \n\t"
- "paddb %[temp], %[three], %[eight] \n\t"
- "psllw %[r_vec], %[r_vec], %[temp] \n\t"
- "or %[b_vec], %[b_vec], %[r_vec] \n\t"
-
- "punpcklhw %[r_vec], %[g_vec], %[b_vec] \n\t"
- "punpckhhw %[b_vec], %[g_vec], %[b_vec] \n\t"
- "punpcklhw %[g_vec], %[r_vec], %[b_vec] \n\t"
-
- "gssdlc1 %[g_vec], 0x07(%[dst_rgb565]) \n\t"
- "gssdrc1 %[g_vec], 0x00(%[dst_rgb565]) \n\t"
-
- "daddiu %[y_ptr], %[y_ptr], 0x04 \n\t"
- "daddiu %[u_ptr], %[u_ptr], 0x02 \n\t"
- "daddiu %[v_ptr], %[v_ptr], 0x02 \n\t"
- "daddiu %[dst_rgb565], %[dst_rgb565], 0x08 \n\t"
- "daddi %[width], %[width], -0x04 \n\t"
- "bnez %[width], 1b \n\t"
-
- : [y]"=&f"(y), [u]"=&f"(u),
- [v]"=&f"(v),
- [b_vec]"=&f"(b_vec), [g_vec]"=&f"(g_vec),
- [r_vec]"=&f"(r_vec), [temp]"=&f"(temp),
- [ub]"=&f"(ub), [ug]"=&f"(ug),
- [vg]"=&f"(vg), [vr]"=&f"(vr),
- [bb]"=&f"(bb), [bg]"=&f"(bg),
- [br]"=&f"(br), [yg]"=&f"(yg)
- : [y_ptr]"r"(src_y), [u_ptr]"r"(src_u),
- [v_ptr]"r"(src_v), [dst_rgb565]"r"(dst_rgb565),
- [yuvcons_ptr]"r"(yuvconstants), [width]"r"(width),
- [zero]"f"(0x00), [five]"f"(0x55),
- [six]"f"(0x6), [mask1]"f"(0xff00ff00ff00ff00),
- [three]"f"(0x3), [mask2]"f"(0x1f0000001f),
- [eight]"f"(0x8), [seven]"f"(0x7),
- [lmove5]"f"(0x5)
- : "memory"
- );
-}
-
-void NV12ToARGBRow_MMI(const uint8_t* src_y,
- const uint8_t* src_uv,
- uint8_t* rgb_buf,
- const struct YuvConstants* yuvconstants,
- int width) {
- uint64_t y, u, v;
- uint64_t b_vec, g_vec, r_vec, temp;
- uint64_t ub,ug,vg,vr,bb,bg,br,yg;
-
- __asm__ volatile(
- "ldc1 %[yg], 0xc0(%[yuvcons_ptr]) \n\t"
- "ldc1 %[bb], 0x60(%[yuvcons_ptr]) \n\t"
- "ldc1 %[ub], 0x00(%[yuvcons_ptr]) \n\t"
- "or %[ub], %[ub], %[mask1] \n\t"
- "ldc1 %[bg], 0x80(%[yuvcons_ptr]) \n\t"
- "ldc1 %[ug], 0x20(%[yuvcons_ptr]) \n\t"
- "punpcklbh %[ug], %[ug], %[zero] \n\t"
- "pshufh %[ug], %[ug], %[zero] \n\t"
- "ldc1 %[vg], 0x20(%[yuvcons_ptr]) \n\t"
- "punpcklbh %[vg], %[vg], %[zero] \n\t"
- "pshufh %[vg], %[vg], %[five] \n\t"
- "ldc1 %[br], 0xa0(%[yuvcons_ptr]) \n\t"
- "ldc1 %[vr], 0x40(%[yuvcons_ptr]) \n\t"
- "punpcklbh %[vr], %[vr], %[zero] \n\t"
- "pshufh %[vr], %[vr], %[five] \n\t"
- "or %[vr], %[vr], %[mask1] \n\t"
-
- "1: \n\t"
- "gslwlc1 %[y], 0x03(%[y_ptr]) \n\t"
- "gslwrc1 %[y], 0x00(%[y_ptr]) \n\t"
- "gslwlc1 %[u], 0x03(%[uv_ptr]) \n\t"
- "gslwrc1 %[u], 0x00(%[uv_ptr]) \n\t"
- "punpcklbh %[u], %[u], %[zero] \n\t"
- "pshufh %[v], %[u], %[vshu] \n\t"
- "pshufh %[u], %[u], %[ushu] \n\t"
-
- "punpcklbh %[y], %[y], %[y] \n\t"
- "pmulhuh %[y], %[y], %[yg] \n\t"
-
- "paddsh %[b_vec], %[y], %[bb] \n\t"
- "pmullh %[temp], %[u], %[ub] \n\t"
- "psubsh %[b_vec], %[b_vec], %[temp] \n\t"
- "psrah %[b_vec], %[b_vec], %[six] \n\t"
-
- "paddsh %[g_vec], %[y], %[bg] \n\t"
- "pmullh %[temp], %[u], %[ug] \n\t"
- "psubsh %[g_vec], %[g_vec], %[temp] \n\t"
- "pmullh %[temp], %[v], %[vg] \n\t"
- "psubsh %[g_vec], %[g_vec], %[temp] \n\t"
- "psrah %[g_vec], %[g_vec], %[six] \n\t"
-
- "paddsh %[r_vec], %[y], %[br] \n\t"
- "pmullh %[temp], %[v], %[vr] \n\t"
- "psubsh %[r_vec], %[r_vec], %[temp] \n\t"
- "psrah %[r_vec], %[r_vec], %[six] \n\t"
-
- "packushb %[r_vec], %[b_vec], %[r_vec] \n\t"
- "packushb %[g_vec], %[g_vec], %[zero] \n\t"
- "punpcklwd %[g_vec], %[g_vec], %[alpha] \n\t"
- "punpcklbh %[b_vec], %[r_vec], %[g_vec] \n\t"
- "punpckhbh %[r_vec], %[r_vec], %[g_vec] \n\t"
- "punpcklhw %[g_vec], %[b_vec], %[r_vec] \n\t"
- "punpckhhw %[b_vec], %[b_vec], %[r_vec] \n\t"
-
- "gssdlc1 %[g_vec], 0x07(%[rgbbuf_ptr]) \n\t"
- "gssdrc1 %[g_vec], 0x00(%[rgbbuf_ptr]) \n\t"
- "gssdlc1 %[b_vec], 0x0f(%[rgbbuf_ptr]) \n\t"
- "gssdrc1 %[b_vec], 0x08(%[rgbbuf_ptr]) \n\t"
-
- "daddiu %[y_ptr], %[y_ptr], 0x04 \n\t"
- "daddiu %[uv_ptr], %[uv_ptr], 0x04 \n\t"
- "daddiu %[rgbbuf_ptr], %[rgbbuf_ptr], 0x10 \n\t"
- "daddi %[width], %[width], -0x04 \n\t"
- "bnez %[width], 1b \n\t"
-
- : [y]"=&f"(y), [u]"=&f"(u),
- [v]"=&f"(v),
- [b_vec]"=&f"(b_vec), [g_vec]"=&f"(g_vec),
- [r_vec]"=&f"(r_vec), [temp]"=&f"(temp),
- [ub]"=&f"(ub), [ug]"=&f"(ug),
- [vg]"=&f"(vg), [vr]"=&f"(vr),
- [bb]"=&f"(bb), [bg]"=&f"(bg),
- [br]"=&f"(br), [yg]"=&f"(yg)
- : [y_ptr]"r"(src_y), [uv_ptr]"r"(src_uv),
- [rgbbuf_ptr]"r"(rgb_buf),
- [yuvcons_ptr]"r"(yuvconstants), [width]"r"(width),
- [zero]"f"(0x00), [five]"f"(0x55),
- [six]"f"(0x6), [mask1]"f"(0xff00ff00ff00ff00),
- [ushu]"f"(0xA0), [vshu]"f"(0xf5),
- [alpha]"f"(-1)
- : "memory"
- );
-}
-
-void NV21ToARGBRow_MMI(const uint8_t* src_y,
- const uint8_t* src_vu,
- uint8_t* rgb_buf,
- const struct YuvConstants* yuvconstants,
- int width) {
- uint64_t y, u, v;
- uint64_t b_vec, g_vec, r_vec, temp;
- uint64_t ub,ug,vg,vr,bb,bg,br,yg;
-
- __asm__ volatile(
- "ldc1 %[yg], 0xc0(%[yuvcons_ptr]) \n\t"
- "ldc1 %[bb], 0x60(%[yuvcons_ptr]) \n\t"
- "ldc1 %[ub], 0x00(%[yuvcons_ptr]) \n\t"
- "or %[ub], %[ub], %[mask1] \n\t"
- "ldc1 %[bg], 0x80(%[yuvcons_ptr]) \n\t"
- "ldc1 %[ug], 0x20(%[yuvcons_ptr]) \n\t"
- "punpcklbh %[ug], %[ug], %[zero] \n\t"
- "pshufh %[ug], %[ug], %[zero] \n\t"
- "ldc1 %[vg], 0x20(%[yuvcons_ptr]) \n\t"
- "punpcklbh %[vg], %[vg], %[zero] \n\t"
- "pshufh %[vg], %[vg], %[five] \n\t"
- "ldc1 %[br], 0xa0(%[yuvcons_ptr]) \n\t"
- "ldc1 %[vr], 0x40(%[yuvcons_ptr]) \n\t"
- "punpcklbh %[vr], %[vr], %[zero] \n\t"
- "pshufh %[vr], %[vr], %[five] \n\t"
- "or %[vr], %[vr], %[mask1] \n\t"
-
- "1: \n\t"
- "gslwlc1 %[y], 0x03(%[y_ptr]) \n\t"
- "gslwrc1 %[y], 0x00(%[y_ptr]) \n\t"
- "gslwlc1 %[u], 0x03(%[vu_ptr]) \n\t"
- "gslwrc1 %[u], 0x00(%[vu_ptr]) \n\t"
- "punpcklbh %[u], %[u], %[zero] \n\t"
- "pshufh %[v], %[u], %[ushu] \n\t"
- "pshufh %[u], %[u], %[vshu] \n\t"
-
- "punpcklbh %[y], %[y], %[y] \n\t"
- "pmulhuh %[y], %[y], %[yg] \n\t"
-
- "paddsh %[b_vec], %[y], %[bb] \n\t"
- "pmullh %[temp], %[u], %[ub] \n\t"
- "psubsh %[b_vec], %[b_vec], %[temp] \n\t"
- "psrah %[b_vec], %[b_vec], %[six] \n\t"
-
- "paddsh %[g_vec], %[y], %[bg] \n\t"
- "pmullh %[temp], %[u], %[ug] \n\t"
- "psubsh %[g_vec], %[g_vec], %[temp] \n\t"
- "pmullh %[temp], %[v], %[vg] \n\t"
- "psubsh %[g_vec], %[g_vec], %[temp] \n\t"
- "psrah %[g_vec], %[g_vec], %[six] \n\t"
-
- "paddsh %[r_vec], %[y], %[br] \n\t"
- "pmullh %[temp], %[v], %[vr] \n\t"
- "psubsh %[r_vec], %[r_vec], %[temp] \n\t"
- "psrah %[r_vec], %[r_vec], %[six] \n\t"
-
- "packushb %[r_vec], %[b_vec], %[r_vec] \n\t"
- "packushb %[g_vec], %[g_vec], %[zero] \n\t"
- "punpcklwd %[g_vec], %[g_vec], %[alpha] \n\t"
- "punpcklbh %[b_vec], %[r_vec], %[g_vec] \n\t"
- "punpckhbh %[r_vec], %[r_vec], %[g_vec] \n\t"
- "punpcklhw %[g_vec], %[b_vec], %[r_vec] \n\t"
- "punpckhhw %[b_vec], %[b_vec], %[r_vec] \n\t"
-
- "gssdlc1 %[g_vec], 0x07(%[rgbbuf_ptr]) \n\t"
- "gssdrc1 %[g_vec], 0x00(%[rgbbuf_ptr]) \n\t"
- "gssdlc1 %[b_vec], 0x0f(%[rgbbuf_ptr]) \n\t"
- "gssdrc1 %[b_vec], 0x08(%[rgbbuf_ptr]) \n\t"
-
- "daddiu %[y_ptr], %[y_ptr], 0x04 \n\t"
- "daddiu %[vu_ptr], %[vu_ptr], 0x04 \n\t"
- "daddiu %[rgbbuf_ptr], %[rgbbuf_ptr], 0x10 \n\t"
- "daddi %[width], %[width], -0x04 \n\t"
- "bnez %[width], 1b \n\t"
-
- : [y]"=&f"(y), [u]"=&f"(u),
- [v]"=&f"(v),
- [b_vec]"=&f"(b_vec), [g_vec]"=&f"(g_vec),
- [r_vec]"=&f"(r_vec), [temp]"=&f"(temp),
- [ub]"=&f"(ub), [ug]"=&f"(ug),
- [vg]"=&f"(vg), [vr]"=&f"(vr),
- [bb]"=&f"(bb), [bg]"=&f"(bg),
- [br]"=&f"(br), [yg]"=&f"(yg)
- : [y_ptr]"r"(src_y), [vu_ptr]"r"(src_vu),
- [rgbbuf_ptr]"r"(rgb_buf),
- [yuvcons_ptr]"r"(yuvconstants), [width]"r"(width),
- [zero]"f"(0x00), [five]"f"(0x55),
- [six]"f"(0x6), [mask1]"f"(0xff00ff00ff00ff00),
- [ushu]"f"(0xA0), [vshu]"f"(0xf5),
- [alpha]"f"(-1)
- : "memory"
- );
-}
-
-void NV12ToRGB24Row_MMI(const uint8_t* src_y,
- const uint8_t* src_uv,
- uint8_t* rgb_buf,
- const struct YuvConstants* yuvconstants,
- int width) {
- uint64_t y, u, v;
- uint64_t b_vec, g_vec, r_vec, temp;
- uint64_t ub,ug,vg,vr,bb,bg,br,yg;
-
- __asm__ volatile(
- "ldc1 %[yg], 0xc0(%[yuvcons_ptr]) \n\t"
- "ldc1 %[bb], 0x60(%[yuvcons_ptr]) \n\t"
- "ldc1 %[ub], 0x00(%[yuvcons_ptr]) \n\t"
- "or %[ub], %[ub], %[mask1] \n\t"
- "ldc1 %[bg], 0x80(%[yuvcons_ptr]) \n\t"
- "ldc1 %[ug], 0x20(%[yuvcons_ptr]) \n\t"
- "punpcklbh %[ug], %[ug], %[zero] \n\t"
- "pshufh %[ug], %[ug], %[zero] \n\t"
- "ldc1 %[vg], 0x20(%[yuvcons_ptr]) \n\t"
- "punpcklbh %[vg], %[vg], %[zero] \n\t"
- "pshufh %[vg], %[vg], %[five] \n\t"
- "ldc1 %[br], 0xa0(%[yuvcons_ptr]) \n\t"
- "ldc1 %[vr], 0x40(%[yuvcons_ptr]) \n\t"
- "punpcklbh %[vr], %[vr], %[zero] \n\t"
- "pshufh %[vr], %[vr], %[five] \n\t"
- "or %[vr], %[vr], %[mask1] \n\t"
-
- "1: \n\t"
- "gslwlc1 %[y], 0x03(%[y_ptr]) \n\t"
- "gslwrc1 %[y], 0x00(%[y_ptr]) \n\t"
- "gslwlc1 %[u], 0x03(%[uv_ptr]) \n\t"
- "gslwrc1 %[u], 0x00(%[uv_ptr]) \n\t"
- "punpcklbh %[u], %[u], %[zero] \n\t"
- "pshufh %[v], %[u], %[vshu] \n\t"
- "pshufh %[u], %[u], %[ushu] \n\t"
-
- "punpcklbh %[y], %[y], %[y] \n\t"
- "pmulhuh %[y], %[y], %[yg] \n\t"
-
- "paddsh %[b_vec], %[y], %[bb] \n\t"
- "pmullh %[temp], %[u], %[ub] \n\t"
- "psubsh %[b_vec], %[b_vec], %[temp] \n\t"
- "psrah %[b_vec], %[b_vec], %[six] \n\t"
-
- "paddsh %[g_vec], %[y], %[bg] \n\t"
- "pmullh %[temp], %[u], %[ug] \n\t"
- "psubsh %[g_vec], %[g_vec], %[temp] \n\t"
- "pmullh %[temp], %[v], %[vg] \n\t"
- "psubsh %[g_vec], %[g_vec], %[temp] \n\t"
- "psrah %[g_vec], %[g_vec], %[six] \n\t"
-
- "paddsh %[r_vec], %[y], %[br] \n\t"
- "pmullh %[temp], %[v], %[vr] \n\t"
- "psubsh %[r_vec], %[r_vec], %[temp] \n\t"
- "psrah %[r_vec], %[r_vec], %[six] \n\t"
-
- "packushb %[r_vec], %[b_vec], %[r_vec] \n\t"
- "packushb %[g_vec], %[g_vec], %[zero] \n\t"
- "punpcklbh %[b_vec], %[r_vec], %[g_vec] \n\t"
- "punpckhbh %[r_vec], %[r_vec], %[g_vec] \n\t"
- "punpcklhw %[g_vec], %[b_vec], %[r_vec] \n\t"
- "punpckhhw %[b_vec], %[b_vec], %[r_vec] \n\t"
-
- "punpckhwd %[r_vec], %[g_vec], %[g_vec] \n\t"
- "psllw %[temp], %[r_vec], %[lmove1] \n\t"
- "or %[g_vec], %[g_vec], %[temp] \n\t"
- "psrlw %[temp], %[r_vec], %[rmove1] \n\t"
- "pextrh %[temp], %[temp], %[zero] \n\t"
- "pinsrh_2 %[g_vec], %[g_vec], %[temp] \n\t"
- "pextrh %[temp], %[b_vec], %[zero] \n\t"
- "pinsrh_3 %[g_vec], %[g_vec], %[temp] \n\t"
- "pextrh %[temp], %[b_vec], %[one] \n\t"
- "punpckhwd %[b_vec], %[b_vec], %[b_vec] \n\t"
- "psllw %[b_vec], %[b_vec], %[rmove1] \n\t"
- "or %[b_vec], %[b_vec], %[temp] \n\t"
- "gssdlc1 %[g_vec], 0x07(%[rgbbuf_ptr]) \n\t"
- "gssdrc1 %[g_vec], 0x00(%[rgbbuf_ptr]) \n\t"
- "gsswlc1 %[b_vec], 0x0b(%[rgbbuf_ptr]) \n\t"
- "gsswrc1 %[b_vec], 0x08(%[rgbbuf_ptr]) \n\t"
-
- "daddiu %[y_ptr], %[y_ptr], 0x04 \n\t"
- "daddiu %[uv_ptr], %[uv_ptr], 0x04 \n\t"
- "daddiu %[rgbbuf_ptr], %[rgbbuf_ptr], 0x0C \n\t"
- "daddi %[width], %[width], -0x04 \n\t"
- "bnez %[width], 1b \n\t"
-
- : [y]"=&f"(y), [u]"=&f"(u),
- [v]"=&f"(v),
- [b_vec]"=&f"(b_vec), [g_vec]"=&f"(g_vec),
- [r_vec]"=&f"(r_vec), [temp]"=&f"(temp),
- [ub]"=&f"(ub), [ug]"=&f"(ug),
- [vg]"=&f"(vg), [vr]"=&f"(vr),
- [bb]"=&f"(bb), [bg]"=&f"(bg),
- [br]"=&f"(br), [yg]"=&f"(yg)
- : [y_ptr]"r"(src_y), [uv_ptr]"r"(src_uv),
- [rgbbuf_ptr]"r"(rgb_buf),
- [yuvcons_ptr]"r"(yuvconstants), [width]"r"(width),
- [zero]"f"(0x00), [five]"f"(0x55),
- [six]"f"(0x6), [mask1]"f"(0xff00ff00ff00ff00),
- [ushu]"f"(0xA0), [vshu]"f"(0xf5),
- [alpha]"f"(-1), [lmove1]"f"(0x18),
- [one]"f"(0x1), [rmove1]"f"(0x8)
- : "memory"
- );
-}
-
-void NV21ToRGB24Row_MMI(const uint8_t* src_y,
- const uint8_t* src_vu,
- uint8_t* rgb_buf,
- const struct YuvConstants* yuvconstants,
- int width) {
- uint64_t y, u, v;
- uint64_t b_vec, g_vec, r_vec, temp;
- uint64_t ub,ug,vg,vr,bb,bg,br,yg;
-
- __asm__ volatile(
- "ldc1 %[yg], 0xc0(%[yuvcons_ptr]) \n\t"
- "ldc1 %[bb], 0x60(%[yuvcons_ptr]) \n\t"
- "ldc1 %[ub], 0x00(%[yuvcons_ptr]) \n\t"
- "or %[ub], %[ub], %[mask1] \n\t"
- "ldc1 %[bg], 0x80(%[yuvcons_ptr]) \n\t"
- "ldc1 %[ug], 0x20(%[yuvcons_ptr]) \n\t"
- "punpcklbh %[ug], %[ug], %[zero] \n\t"
- "pshufh %[ug], %[ug], %[zero] \n\t"
- "ldc1 %[vg], 0x20(%[yuvcons_ptr]) \n\t"
- "punpcklbh %[vg], %[vg], %[zero] \n\t"
- "pshufh %[vg], %[vg], %[five] \n\t"
- "ldc1 %[br], 0xa0(%[yuvcons_ptr]) \n\t"
- "ldc1 %[vr], 0x40(%[yuvcons_ptr]) \n\t"
- "punpcklbh %[vr], %[vr], %[zero] \n\t"
- "pshufh %[vr], %[vr], %[five] \n\t"
- "or %[vr], %[vr], %[mask1] \n\t"
-
- "1: \n\t"
- "gslwlc1 %[y], 0x03(%[y_ptr]) \n\t"
- "gslwrc1 %[y], 0x00(%[y_ptr]) \n\t"
- "gslwlc1 %[u], 0x03(%[vu_ptr]) \n\t"
- "gslwrc1 %[u], 0x00(%[vu_ptr]) \n\t"
- "punpcklbh %[u], %[u], %[zero] \n\t"
- "pshufh %[v], %[u], %[ushu] \n\t"
- "pshufh %[u], %[u], %[vshu] \n\t"
-
- "punpcklbh %[y], %[y], %[y] \n\t"
- "pmulhuh %[y], %[y], %[yg] \n\t"
-
- "paddsh %[b_vec], %[y], %[bb] \n\t"
- "pmullh %[temp], %[u], %[ub] \n\t"
- "psubsh %[b_vec], %[b_vec], %[temp] \n\t"
- "psrah %[b_vec], %[b_vec], %[six] \n\t"
-
- "paddsh %[g_vec], %[y], %[bg] \n\t"
- "pmullh %[temp], %[u], %[ug] \n\t"
- "psubsh %[g_vec], %[g_vec], %[temp] \n\t"
- "pmullh %[temp], %[v], %[vg] \n\t"
- "psubsh %[g_vec], %[g_vec], %[temp] \n\t"
- "psrah %[g_vec], %[g_vec], %[six] \n\t"
-
- "paddsh %[r_vec], %[y], %[br] \n\t"
- "pmullh %[temp], %[v], %[vr] \n\t"
- "psubsh %[r_vec], %[r_vec], %[temp] \n\t"
- "psrah %[r_vec], %[r_vec], %[six] \n\t"
-
- "packushb %[r_vec], %[b_vec], %[r_vec] \n\t"
- "packushb %[g_vec], %[g_vec], %[zero] \n\t"
- "punpcklbh %[b_vec], %[r_vec], %[g_vec] \n\t"
- "punpckhbh %[r_vec], %[r_vec], %[g_vec] \n\t"
- "punpcklhw %[g_vec], %[b_vec], %[r_vec] \n\t"
- "punpckhhw %[b_vec], %[b_vec], %[r_vec] \n\t"
-
- "punpckhwd %[r_vec], %[g_vec], %[g_vec] \n\t"
- "psllw %[temp], %[r_vec], %[lmove1] \n\t"
- "or %[g_vec], %[g_vec], %[temp] \n\t"
- "psrlw %[temp], %[r_vec], %[rmove1] \n\t"
- "pextrh %[temp], %[temp], %[zero] \n\t"
- "pinsrh_2 %[g_vec], %[g_vec], %[temp] \n\t"
- "pextrh %[temp], %[b_vec], %[zero] \n\t"
- "pinsrh_3 %[g_vec], %[g_vec], %[temp] \n\t"
- "pextrh %[temp], %[b_vec], %[one] \n\t"
- "punpckhwd %[b_vec], %[b_vec], %[b_vec] \n\t"
- "psllw %[b_vec], %[b_vec], %[rmove1] \n\t"
- "or %[b_vec], %[b_vec], %[temp] \n\t"
- "gssdlc1 %[g_vec], 0x07(%[rgbbuf_ptr]) \n\t"
- "gssdrc1 %[g_vec], 0x00(%[rgbbuf_ptr]) \n\t"
- "gsswlc1 %[b_vec], 0x0b(%[rgbbuf_ptr]) \n\t"
- "gsswrc1 %[b_vec], 0x08(%[rgbbuf_ptr]) \n\t"
-
- "daddiu %[y_ptr], %[y_ptr], 0x04 \n\t"
- "daddiu %[vu_ptr], %[vu_ptr], 0x04 \n\t"
- "daddiu %[rgbbuf_ptr], %[rgbbuf_ptr], 0x0C \n\t"
- "daddi %[width], %[width], -0x04 \n\t"
- "bnez %[width], 1b \n\t"
-
- : [y]"=&f"(y), [u]"=&f"(u),
- [v]"=&f"(v),
- [b_vec]"=&f"(b_vec), [g_vec]"=&f"(g_vec),
- [r_vec]"=&f"(r_vec), [temp]"=&f"(temp),
- [ub]"=&f"(ub), [ug]"=&f"(ug),
- [vg]"=&f"(vg), [vr]"=&f"(vr),
- [bb]"=&f"(bb), [bg]"=&f"(bg),
- [br]"=&f"(br), [yg]"=&f"(yg)
- : [y_ptr]"r"(src_y), [vu_ptr]"r"(src_vu),
- [rgbbuf_ptr]"r"(rgb_buf),
- [yuvcons_ptr]"r"(yuvconstants), [width]"r"(width),
- [zero]"f"(0x00), [five]"f"(0x55),
- [six]"f"(0x6), [mask1]"f"(0xff00ff00ff00ff00),
- [ushu]"f"(0xA0), [vshu]"f"(0xf5),
- [lmove1]"f"(0x18), [rmove1]"f"(0x8),
- [one]"f"(0x1)
- : "memory"
- );
-}
-
-void NV12ToRGB565Row_MMI(const uint8_t* src_y,
- const uint8_t* src_uv,
- uint8_t* dst_rgb565,
- const struct YuvConstants* yuvconstants,
- int width) {
- uint64_t y, u, v;
- uint64_t b_vec, g_vec, r_vec, temp;
- uint64_t ub,ug,vg,vr,bb,bg,br,yg;
-
- __asm__ volatile(
- "ldc1 %[yg], 0xc0(%[yuvcons_ptr]) \n\t"
- "ldc1 %[bb], 0x60(%[yuvcons_ptr]) \n\t"
- "ldc1 %[ub], 0x00(%[yuvcons_ptr]) \n\t"
- "or %[ub], %[ub], %[mask1] \n\t"
- "ldc1 %[bg], 0x80(%[yuvcons_ptr]) \n\t"
- "ldc1 %[ug], 0x20(%[yuvcons_ptr]) \n\t"
- "punpcklbh %[ug], %[ug], %[zero] \n\t"
- "pshufh %[ug], %[ug], %[zero] \n\t"
- "ldc1 %[vg], 0x20(%[yuvcons_ptr]) \n\t"
- "punpcklbh %[vg], %[vg], %[zero] \n\t"
- "pshufh %[vg], %[vg], %[five] \n\t"
- "ldc1 %[br], 0xa0(%[yuvcons_ptr]) \n\t"
- "ldc1 %[vr], 0x40(%[yuvcons_ptr]) \n\t"
- "punpcklbh %[vr], %[vr], %[zero] \n\t"
- "pshufh %[vr], %[vr], %[five] \n\t"
- "or %[vr], %[vr], %[mask1] \n\t"
-
- "1: \n\t"
- "gslwlc1 %[y], 0x03(%[y_ptr]) \n\t"
- "gslwrc1 %[y], 0x00(%[y_ptr]) \n\t"
- "gslwlc1 %[u], 0x03(%[uv_ptr]) \n\t"
- "gslwrc1 %[u], 0x00(%[uv_ptr]) \n\t"
- "punpcklbh %[u], %[u], %[zero] \n\t"
- "pshufh %[v], %[u], %[vshu] \n\t"
- "pshufh %[u], %[u], %[ushu] \n\t"
-
- "punpcklbh %[y], %[y], %[y] \n\t"
- "pmulhuh %[y], %[y], %[yg] \n\t"
-
- "paddsh %[b_vec], %[y], %[bb] \n\t"
- "pmullh %[temp], %[u], %[ub] \n\t"
- "psubsh %[b_vec], %[b_vec], %[temp] \n\t"
- "psrah %[b_vec], %[b_vec], %[six] \n\t"
-
- "paddsh %[g_vec], %[y], %[bg] \n\t"
- "pmullh %[temp], %[u], %[ug] \n\t"
- "psubsh %[g_vec], %[g_vec], %[temp] \n\t"
- "pmullh %[temp], %[v], %[vg] \n\t"
- "psubsh %[g_vec], %[g_vec], %[temp] \n\t"
- "psrah %[g_vec], %[g_vec], %[six] \n\t"
-
- "paddsh %[r_vec], %[y], %[br] \n\t"
- "pmullh %[temp], %[v], %[vr] \n\t"
- "psubsh %[r_vec], %[r_vec], %[temp] \n\t"
- "psrah %[r_vec], %[r_vec], %[six] \n\t"
-
- "packushb %[r_vec], %[b_vec], %[r_vec] \n\t"
- "packushb %[g_vec], %[g_vec], %[zero] \n\t"
- "punpcklbh %[b_vec], %[r_vec], %[g_vec] \n\t"
- "punpckhbh %[r_vec], %[r_vec], %[g_vec] \n\t"
- "punpcklhw %[g_vec], %[b_vec], %[r_vec] \n\t"
- "punpckhhw %[b_vec], %[b_vec], %[r_vec] \n\t"
-
- "psrlh %[temp], %[g_vec], %[three] \n\t"
- "and %[g_vec], %[temp], %[mask2] \n\t"
- "psrlw %[temp], %[temp], %[seven] \n\t"
- "psrlw %[r_vec], %[mask1], %[eight] \n\t"
- "and %[r_vec], %[temp], %[r_vec] \n\t"
- "psubb %[y], %[eight], %[three] \n\t"//5
- "psllw %[r_vec], %[r_vec], %[y] \n\t"
- "or %[g_vec], %[g_vec], %[r_vec] \n\t"
- "paddb %[r_vec], %[three], %[six] \n\t"
- "psrlw %[temp], %[temp], %[r_vec] \n\t"
- "and %[r_vec], %[temp], %[mask2] \n\t"
- "paddb %[temp], %[three], %[eight] \n\t"
- "psllw %[r_vec], %[r_vec], %[temp] \n\t"
- "or %[g_vec], %[g_vec], %[r_vec] \n\t"
-
- "psrlh %[temp], %[b_vec], %[three] \n\t"
- "and %[b_vec], %[temp], %[mask2] \n\t"
- "psrlw %[temp], %[temp], %[seven] \n\t"
- "psrlw %[r_vec], %[mask1], %[eight] \n\t"
- "and %[r_vec], %[temp], %[r_vec] \n\t"
- "psubb %[y], %[eight], %[three] \n\t"//5
- "psllw %[r_vec], %[r_vec], %[y] \n\t"
- "or %[b_vec], %[b_vec], %[r_vec] \n\t"
- "paddb %[r_vec], %[three], %[six] \n\t"
- "psrlw %[temp], %[temp], %[r_vec] \n\t"
- "and %[r_vec], %[temp], %[mask2] \n\t"
- "paddb %[temp], %[three], %[eight] \n\t"
- "psllw %[r_vec], %[r_vec], %[temp] \n\t"
- "or %[b_vec], %[b_vec], %[r_vec] \n\t"
-
- "punpcklhw %[r_vec], %[g_vec], %[b_vec] \n\t"
- "punpckhhw %[b_vec], %[g_vec], %[b_vec] \n\t"
- "punpcklhw %[g_vec], %[r_vec], %[b_vec] \n\t"
-
- "gssdlc1 %[g_vec], 0x07(%[dst_rgb565]) \n\t"
- "gssdrc1 %[g_vec], 0x00(%[dst_rgb565]) \n\t"
-
- "daddiu %[y_ptr], %[y_ptr], 0x04 \n\t"
- "daddiu %[uv_ptr], %[uv_ptr], 0x04 \n\t"
- "daddiu %[dst_rgb565], %[dst_rgb565], 0x08 \n\t"
- "daddi %[width], %[width], -0x04 \n\t"
- "bnez %[width], 1b \n\t"
-
- : [y]"=&f"(y), [u]"=&f"(u),
- [v]"=&f"(v),
- [b_vec]"=&f"(b_vec), [g_vec]"=&f"(g_vec),
- [r_vec]"=&f"(r_vec), [temp]"=&f"(temp),
- [ub]"=&f"(ub), [ug]"=&f"(ug),
- [vg]"=&f"(vg), [vr]"=&f"(vr),
- [bb]"=&f"(bb), [bg]"=&f"(bg),
- [br]"=&f"(br), [yg]"=&f"(yg)
- : [y_ptr]"r"(src_y), [uv_ptr]"r"(src_uv),
- [dst_rgb565]"r"(dst_rgb565),
- [yuvcons_ptr]"r"(yuvconstants), [width]"r"(width),
- [zero]"f"(0x00), [five]"f"(0x55),
- [six]"f"(0x6), [mask1]"f"(0xff00ff00ff00ff00),
- [ushu]"f"(0xA0), [vshu]"f"(0xf5),
- [three]"f"(0x3), [mask2]"f"(0x1f0000001f),
- [eight]"f"(0x8), [seven]"f"(0x7)
- : "memory"
- );
-}
-
-void YUY2ToARGBRow_MMI(const uint8_t* src_yuy2,
- uint8_t* rgb_buf,
- const struct YuvConstants* yuvconstants,
- int width) {
- uint64_t y, u, v;
- uint64_t b_vec, g_vec, r_vec, temp;
- uint64_t ub,ug,vg,vr,bb,bg,br,yg;
-
- __asm__ volatile(
- "ldc1 %[yg], 0xc0(%[yuvcons_ptr]) \n\t"
- "ldc1 %[bb], 0x60(%[yuvcons_ptr]) \n\t"
- "ldc1 %[ub], 0x00(%[yuvcons_ptr]) \n\t"
- "or %[ub], %[ub], %[mask1] \n\t"
- "ldc1 %[bg], 0x80(%[yuvcons_ptr]) \n\t"
- "ldc1 %[ug], 0x20(%[yuvcons_ptr]) \n\t"
- "punpcklbh %[ug], %[ug], %[zero] \n\t"
- "pshufh %[ug], %[ug], %[zero] \n\t"
- "ldc1 %[vg], 0x20(%[yuvcons_ptr]) \n\t"
- "punpcklbh %[vg], %[vg], %[zero] \n\t"
- "pshufh %[vg], %[vg], %[five] \n\t"
- "ldc1 %[br], 0xa0(%[yuvcons_ptr]) \n\t"
- "ldc1 %[vr], 0x40(%[yuvcons_ptr]) \n\t"
- "punpcklbh %[vr], %[vr], %[zero] \n\t"
- "pshufh %[vr], %[vr], %[five] \n\t"
- "or %[vr], %[vr], %[mask1] \n\t"
-
- "1: \n\t"
- "gsldlc1 %[y], 0x07(%[yuy2_ptr]) \n\t"
- "gsldrc1 %[y], 0x00(%[yuy2_ptr]) \n\t"
- "psrlh %[temp], %[y], %[eight] \n\t"
- "pshufh %[u], %[temp], %[ushu] \n\t"
- "pshufh %[v], %[temp], %[vshu] \n\t"
-
- "psrlh %[temp], %[mask1], %[eight] \n\t"
- "and %[y], %[y], %[temp] \n\t"
- "psllh %[temp], %[y], %[eight] \n\t"
- "or %[y], %[y], %[temp] \n\t"
- "pmulhuh %[y], %[y], %[yg] \n\t"
-
- "paddsh %[b_vec], %[y], %[bb] \n\t"
- "pmullh %[temp], %[u], %[ub] \n\t"
- "psubsh %[b_vec], %[b_vec], %[temp] \n\t"
- "psrah %[b_vec], %[b_vec], %[six] \n\t"
-
- "paddsh %[g_vec], %[y], %[bg] \n\t"
- "pmullh %[temp], %[u], %[ug] \n\t"
- "psubsh %[g_vec], %[g_vec], %[temp] \n\t"
- "pmullh %[temp], %[v], %[vg] \n\t"
- "psubsh %[g_vec], %[g_vec], %[temp] \n\t"
- "psrah %[g_vec], %[g_vec], %[six] \n\t"
-
- "paddsh %[r_vec], %[y], %[br] \n\t"
- "pmullh %[temp], %[v], %[vr] \n\t"
- "psubsh %[r_vec], %[r_vec], %[temp] \n\t"
- "psrah %[r_vec], %[r_vec], %[six] \n\t"
-
- "packushb %[r_vec], %[b_vec], %[r_vec] \n\t"
- "packushb %[g_vec], %[g_vec], %[zero] \n\t"
- "punpcklwd %[g_vec], %[g_vec], %[alpha] \n\t"
- "punpcklbh %[b_vec], %[r_vec], %[g_vec] \n\t"
- "punpckhbh %[r_vec], %[r_vec], %[g_vec] \n\t"
- "punpcklhw %[g_vec], %[b_vec], %[r_vec] \n\t"
- "punpckhhw %[b_vec], %[b_vec], %[r_vec] \n\t"
-
- "gssdlc1 %[g_vec], 0x07(%[rgbbuf_ptr]) \n\t"
- "gssdrc1 %[g_vec], 0x00(%[rgbbuf_ptr]) \n\t"
- "gssdlc1 %[b_vec], 0x0f(%[rgbbuf_ptr]) \n\t"
- "gssdrc1 %[b_vec], 0x08(%[rgbbuf_ptr]) \n\t"
-
- "daddiu %[yuy2_ptr], %[yuy2_ptr], 0x08 \n\t"
- "daddiu %[rgbbuf_ptr], %[rgbbuf_ptr], 0x10 \n\t"
- "daddi %[width], %[width], -0x04 \n\t"
- "bnez %[width], 1b \n\t"
-
- : [y]"=&f"(y), [u]"=&f"(u),
- [v]"=&f"(v),
- [b_vec]"=&f"(b_vec), [g_vec]"=&f"(g_vec),
- [r_vec]"=&f"(r_vec), [temp]"=&f"(temp),
- [ub]"=&f"(ub), [ug]"=&f"(ug),
- [vg]"=&f"(vg), [vr]"=&f"(vr),
- [bb]"=&f"(bb), [bg]"=&f"(bg),
- [br]"=&f"(br), [yg]"=&f"(yg)
- : [yuy2_ptr]"r"(src_yuy2), [rgbbuf_ptr]"r"(rgb_buf),
- [yuvcons_ptr]"r"(yuvconstants), [width]"r"(width),
- [zero]"f"(0x00), [five]"f"(0x55),
- [six]"f"(0x6), [mask1]"f"(0xff00ff00ff00ff00),
- [ushu]"f"(0xA0), [vshu]"f"(0xf5),
- [alpha]"f"(-1), [eight]"f"(0x8)
- : "memory"
- );
-}
-
-void UYVYToARGBRow_MMI(const uint8_t* src_uyvy,
- uint8_t* rgb_buf,
- const struct YuvConstants* yuvconstants,
- int width) {
- uint64_t y, u, v;
- uint64_t b_vec, g_vec, r_vec, temp;
- uint64_t ub,ug,vg,vr,bb,bg,br,yg;
-
- __asm__ volatile(
- "ldc1 %[yg], 0xc0(%[yuvcons_ptr]) \n\t"
- "ldc1 %[bb], 0x60(%[yuvcons_ptr]) \n\t"
- "ldc1 %[ub], 0x00(%[yuvcons_ptr]) \n\t"
- "or %[ub], %[ub], %[mask1] \n\t"
- "ldc1 %[bg], 0x80(%[yuvcons_ptr]) \n\t"
- "ldc1 %[ug], 0x20(%[yuvcons_ptr]) \n\t"
- "punpcklbh %[ug], %[ug], %[zero] \n\t"
- "pshufh %[ug], %[ug], %[zero] \n\t"
- "ldc1 %[vg], 0x20(%[yuvcons_ptr]) \n\t"
- "punpcklbh %[vg], %[vg], %[zero] \n\t"
- "pshufh %[vg], %[vg], %[five] \n\t"
- "ldc1 %[br], 0xa0(%[yuvcons_ptr]) \n\t"
- "ldc1 %[vr], 0x40(%[yuvcons_ptr]) \n\t"
- "punpcklbh %[vr], %[vr], %[zero] \n\t"
- "pshufh %[vr], %[vr], %[five] \n\t"
- "or %[vr], %[vr], %[mask1] \n\t"
-
- "1: \n\t"
- "gsldlc1 %[y], 0x07(%[uyvy_ptr]) \n\t"
- "gsldrc1 %[y], 0x00(%[uyvy_ptr]) \n\t"
- "psrlh %[temp], %[mask1], %[eight] \n\t"
- "and %[temp], %[y], %[temp] \n\t"
- "pshufh %[u], %[temp], %[ushu] \n\t"
- "pshufh %[v], %[temp], %[vshu] \n\t"
-
- "psrlh %[y], %[y], %[eight] \n\t"
- "psllh %[temp], %[y], %[eight] \n\t"
- "or %[y], %[y], %[temp] \n\t"
- "pmulhuh %[y], %[y], %[yg] \n\t"
-
- "paddsh %[b_vec], %[y], %[bb] \n\t"
- "pmullh %[temp], %[u], %[ub] \n\t"
- "psubsh %[b_vec], %[b_vec], %[temp] \n\t"
- "psrah %[b_vec], %[b_vec], %[six] \n\t"
-
- "paddsh %[g_vec], %[y], %[bg] \n\t"
- "pmullh %[temp], %[u], %[ug] \n\t"
- "psubsh %[g_vec], %[g_vec], %[temp] \n\t"
- "pmullh %[temp], %[v], %[vg] \n\t"
- "psubsh %[g_vec], %[g_vec], %[temp] \n\t"
- "psrah %[g_vec], %[g_vec], %[six] \n\t"
-
- "paddsh %[r_vec], %[y], %[br] \n\t"
- "pmullh %[temp], %[v], %[vr] \n\t"
- "psubsh %[r_vec], %[r_vec], %[temp] \n\t"
- "psrah %[r_vec], %[r_vec], %[six] \n\t"
-
- "packushb %[r_vec], %[b_vec], %[r_vec] \n\t"
- "packushb %[g_vec], %[g_vec], %[zero] \n\t"
- "punpcklwd %[g_vec], %[g_vec], %[alpha] \n\t"
- "punpcklbh %[b_vec], %[r_vec], %[g_vec] \n\t"
- "punpckhbh %[r_vec], %[r_vec], %[g_vec] \n\t"
- "punpcklhw %[g_vec], %[b_vec], %[r_vec] \n\t"
- "punpckhhw %[b_vec], %[b_vec], %[r_vec] \n\t"
-
- "gssdlc1 %[g_vec], 0x07(%[rgbbuf_ptr]) \n\t"
- "gssdrc1 %[g_vec], 0x00(%[rgbbuf_ptr]) \n\t"
- "gssdlc1 %[b_vec], 0x0f(%[rgbbuf_ptr]) \n\t"
- "gssdrc1 %[b_vec], 0x08(%[rgbbuf_ptr]) \n\t"
-
- "daddiu %[uyvy_ptr], %[uyvy_ptr], 0x08 \n\t"
- "daddiu %[rgbbuf_ptr], %[rgbbuf_ptr], 0x10 \n\t"
- "daddi %[width], %[width], -0x04 \n\t"
- "bnez %[width], 1b \n\t"
-
- : [y]"=&f"(y), [u]"=&f"(u),
- [v]"=&f"(v),
- [b_vec]"=&f"(b_vec), [g_vec]"=&f"(g_vec),
- [r_vec]"=&f"(r_vec), [temp]"=&f"(temp),
- [ub]"=&f"(ub), [ug]"=&f"(ug),
- [vg]"=&f"(vg), [vr]"=&f"(vr),
- [bb]"=&f"(bb), [bg]"=&f"(bg),
- [br]"=&f"(br), [yg]"=&f"(yg)
- : [uyvy_ptr]"r"(src_uyvy), [rgbbuf_ptr]"r"(rgb_buf),
- [yuvcons_ptr]"r"(yuvconstants), [width]"r"(width),
- [zero]"f"(0x00), [five]"f"(0x55),
- [six]"f"(0x6), [mask1]"f"(0xff00ff00ff00ff00),
- [ushu]"f"(0xA0), [vshu]"f"(0xf5),
- [alpha]"f"(-1), [eight]"f"(0x8)
- : "memory"
- );
-}
-
-void I422ToRGBARow_MMI(const uint8_t* src_y,
- const uint8_t* src_u,
- const uint8_t* src_v,
- uint8_t* rgb_buf,
- const struct YuvConstants* yuvconstants,
- int width) {
- uint64_t y, u, v;
- uint64_t b_vec, g_vec, r_vec, temp;
- uint64_t ub,ug,vg,vr,bb,bg,br,yg;
-
- __asm__ volatile(
- "ldc1 %[yg], 0xc0(%[yuvcons_ptr]) \n\t"
- "ldc1 %[bb], 0x60(%[yuvcons_ptr]) \n\t"
- "ldc1 %[ub], 0x00(%[yuvcons_ptr]) \n\t"
- "or %[ub], %[ub], %[mask1] \n\t"
- "ldc1 %[bg], 0x80(%[yuvcons_ptr]) \n\t"
- "ldc1 %[ug], 0x20(%[yuvcons_ptr]) \n\t"
- "punpcklbh %[ug], %[ug], %[zero] \n\t"
- "pshufh %[ug], %[ug], %[zero] \n\t"
- "ldc1 %[vg], 0x20(%[yuvcons_ptr]) \n\t"
- "punpcklbh %[vg], %[vg], %[zero] \n\t"
- "pshufh %[vg], %[vg], %[five] \n\t"
- "ldc1 %[br], 0xa0(%[yuvcons_ptr]) \n\t"
- "ldc1 %[vr], 0x40(%[yuvcons_ptr]) \n\t"
- "punpcklbh %[vr], %[vr], %[zero] \n\t"
- "pshufh %[vr], %[vr], %[five] \n\t"
- "or %[vr], %[vr], %[mask1] \n\t"
-
- "1: \n\t"
- "gslwlc1 %[y], 0x03(%[y_ptr]) \n\t"
- "gslwrc1 %[y], 0x00(%[y_ptr]) \n\t"
- "gslwlc1 %[u], 0x03(%[u_ptr]) \n\t"
- "gslwrc1 %[u], 0x00(%[u_ptr]) \n\t"
- "gslwlc1 %[v], 0x03(%[v_ptr]) \n\t"
- "gslwrc1 %[v], 0x00(%[v_ptr]) \n\t"
-
- "punpcklbh %[y], %[y], %[y] \n\t"
- "pmulhuh %[y], %[y], %[yg] \n\t"
-
- "punpcklbh %[u], %[u], %[u] \n\t"
- "punpcklbh %[u], %[u], %[zero] \n\t"
- "paddsh %[b_vec], %[y], %[bb] \n\t"
- "pmullh %[temp], %[u], %[ub] \n\t"
- "psubsh %[b_vec], %[b_vec], %[temp] \n\t"
- "psrah %[b_vec], %[b_vec], %[six] \n\t"
-
- "punpcklbh %[v], %[v], %[v] \n\t"
- "punpcklbh %[v], %[v], %[zero] \n\t"
- "paddsh %[g_vec], %[y], %[bg] \n\t"
- "pmullh %[temp], %[u], %[ug] \n\t"
- "psubsh %[g_vec], %[g_vec], %[temp] \n\t"
- "pmullh %[temp], %[v], %[vg] \n\t"
- "psubsh %[g_vec], %[g_vec], %[temp] \n\t"
- "psrah %[g_vec], %[g_vec], %[six] \n\t"
-
- "paddsh %[r_vec], %[y], %[br] \n\t"
- "pmullh %[temp], %[v], %[vr] \n\t"
- "psubsh %[r_vec], %[r_vec], %[temp] \n\t"
- "psrah %[r_vec], %[r_vec], %[six] \n\t"
-
- "packushb %[r_vec], %[b_vec], %[r_vec] \n\t"
- "packushb %[g_vec], %[g_vec], %[zero] \n\t"
- "punpcklwd %[g_vec], %[alpha], %[g_vec] \n\t"
- "punpcklbh %[b_vec], %[g_vec], %[r_vec] \n\t"
- "punpckhbh %[r_vec], %[g_vec], %[r_vec] \n\t"
- "punpcklhw %[g_vec], %[b_vec], %[r_vec] \n\t"
- "punpckhhw %[b_vec], %[b_vec], %[r_vec] \n\t"
-
- "gssdlc1 %[g_vec], 0x07(%[rgbbuf_ptr]) \n\t"
- "gssdrc1 %[g_vec], 0x00(%[rgbbuf_ptr]) \n\t"
- "gssdlc1 %[b_vec], 0x0f(%[rgbbuf_ptr]) \n\t"
- "gssdrc1 %[b_vec], 0x08(%[rgbbuf_ptr]) \n\t"
-
- "daddiu %[y_ptr], %[y_ptr], 0x04 \n\t"
- "daddiu %[u_ptr], %[u_ptr], 0x02 \n\t"
- "daddiu %[v_ptr], %[v_ptr], 0x02 \n\t"
- "daddiu %[rgbbuf_ptr], %[rgbbuf_ptr], 0x10 \n\t"
- "daddi %[width], %[width], -0x04 \n\t"
- "bnez %[width], 1b \n\t"
-
- : [y]"=&f"(y), [u]"=&f"(u),
- [v]"=&f"(v),
- [b_vec]"=&f"(b_vec), [g_vec]"=&f"(g_vec),
- [r_vec]"=&f"(r_vec), [temp]"=&f"(temp),
- [ub]"=&f"(ub), [ug]"=&f"(ug),
- [vg]"=&f"(vg), [vr]"=&f"(vr),
- [bb]"=&f"(bb), [bg]"=&f"(bg),
- [br]"=&f"(br), [yg]"=&f"(yg)
- : [y_ptr]"r"(src_y), [u_ptr]"r"(src_u),
- [v_ptr]"r"(src_v), [rgbbuf_ptr]"r"(rgb_buf),
- [yuvcons_ptr]"r"(yuvconstants), [width]"r"(width),
- [zero]"f"(0x00), [five]"f"(0x55),
- [six]"f"(0x6), [mask1]"f"(0xff00ff00ff00ff00),
- [alpha]"f"(-1)
- : "memory"
- );
-}
-
-void ARGBSetRow_MMI(uint8_t* dst_argb, uint32_t v32, int width) {
- __asm__ volatile (
- "punpcklwd %[v32], %[v32], %[v32] \n\t"
- "1: \n\t"
- "gssdlc1 %[v32], 0x07(%[dst_ptr]) \n\t"
- "gssdrc1 %[v32], 0x00(%[dst_ptr]) \n\t"
- "gssdlc1 %[v32], 0x0f(%[dst_ptr]) \n\t"
- "gssdrc1 %[v32], 0x08(%[dst_ptr]) \n\t"
-
- "daddi %[width], %[width], -0x04 \n\t"
- "daddiu %[dst_ptr], %[dst_ptr], 0x10 \n\t"
- "bnez %[width], 1b \n\t"
- : [v32]"+&f"(v32)
- : [dst_ptr]"r"(dst_argb), [width]"r"(width)
- : "memory"
- );
-}
-// clang-format on
-
-// 10 bit YUV to ARGB
-#endif // !defined(LIBYUV_DISABLE_MMI) && defined(_MIPS_ARCH_LOONGSON3A)
-
-#ifdef __cplusplus
-} // extern "C"
-} // namespace libyuv
-#endif
diff --git a/files/source/row_neon.cc b/files/source/row_neon.cc
index 804ff839..4ed13638 100644
--- a/files/source/row_neon.cc
+++ b/files/source/row_neon.cc
@@ -89,12 +89,14 @@ extern "C" {
"vsli.u16 d2, d2, #8 \n" \
"vsri.u16 d3, d3, #8 \n"
+// TODO: Use single register for kUVCoeff and multiply by lane
#define YUVTORGB_SETUP \
+ "vld1.16 {d31}, [%[kRGBCoeffBias]] \n" \
"vld4.8 {d26[], d27[], d28[], d29[]}, [%[kUVCoeff]] \n" \
- "vld1.16 {d31[]}, [%[kRGBCoeffBias]]! \n" \
- "vld1.16 {d20[], d21[]}, [%[kRGBCoeffBias]]! \n" \
- "vld1.16 {d22[], d23[]}, [%[kRGBCoeffBias]]! \n" \
- "vld1.16 {d24[], d25[]}, [%[kRGBCoeffBias]] \n"
+ "vdup.u16 q10, d31[1] \n" \
+ "vdup.u16 q11, d31[2] \n" \
+ "vdup.u16 q12, d31[3] \n" \
+ "vdup.u16 d31, d31[0] \n"
// q0: B uint16x8_t
// q1: G uint16x8_t
@@ -156,6 +158,29 @@ void I444ToARGBRow_NEON(const uint8_t* src_y,
: "cc", "memory", YUVTORGB_REGS, "d6");
}
+void I444ToRGB24Row_NEON(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_rgb24,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ asm volatile(
+ YUVTORGB_SETUP
+ "1: \n" READYUV444 YUVTORGB
+ RGBTORGB8
+ "subs %[width], %[width], #8 \n"
+ "vst3.8 {d0, d2, d4}, [%[dst_rgb24]]! \n"
+ "bgt 1b \n"
+ : [src_y] "+r"(src_y), // %[src_y]
+ [src_u] "+r"(src_u), // %[src_u]
+ [src_v] "+r"(src_v), // %[src_v]
+ [dst_rgb24] "+r"(dst_rgb24), // %[dst_argb]
+ [width] "+r"(width) // %[width]
+ : [kUVCoeff] "r"(&yuvconstants->kUVCoeff), // %[kUVCoeff]
+ [kRGBCoeffBias] "r"(&yuvconstants->kRGBCoeffBias) // %[kRGBCoeffBias]
+ : "cc", "memory", YUVTORGB_REGS);
+}
+
void I422ToARGBRow_NEON(const uint8_t* src_y,
const uint8_t* src_u,
const uint8_t* src_v,
@@ -586,10 +611,10 @@ void DetileRow_NEON(const uint8_t* src,
int width) {
asm volatile(
"1: \n"
- "vld1.16 {q0}, [%0], %3 \n" // load 16 bytes
+ "vld1.8 {q0}, [%0], %3 \n" // load 16 bytes
"subs %2, %2, #16 \n" // 16 processed per loop
- "pld [%0, 1792] \n"
- "vst1.16 {q0}, [%1]! \n" // store 16 bytes
+ "pld [%0, #1792] \n"
+ "vst1.8 {q0}, [%1]! \n" // store 16 bytes
"bgt 1b \n"
: "+r"(src), // %0
"+r"(dst), // %1
@@ -599,6 +624,26 @@ void DetileRow_NEON(const uint8_t* src,
);
}
+// Reads 16 byte Y's of 16 bits from tile and writes out 16 Y's.
+void DetileRow_16_NEON(const uint16_t* src,
+ ptrdiff_t src_tile_stride,
+ uint16_t* dst,
+ int width) {
+ asm volatile(
+ "1: \n"
+ "vld1.16 {q0, q1}, [%0], %3 \n" // load 16 pixels
+ "subs %2, %2, #16 \n" // 16 processed per loop
+ "pld [%0, #3584] \n"
+ "vst1.16 {q0, q1}, [%1]! \n" // store 16 pixels
+ "bgt 1b \n"
+ : "+r"(src), // %0
+ "+r"(dst), // %1
+ "+r"(width) // %2
+ : "r"(src_tile_stride * 2) // %3
+ : "cc", "memory", "q0", "q1" // Clobber List
+ );
+}
+
// Read 16 bytes of UV, detile, and write 8 bytes of U and 8 bytes of V.
void DetileSplitUVRow_NEON(const uint8_t* src_uv,
ptrdiff_t src_tile_stride,
@@ -609,7 +654,7 @@ void DetileSplitUVRow_NEON(const uint8_t* src_uv,
"1: \n"
"vld2.8 {d0, d1}, [%0], %4 \n"
"subs %3, %3, #16 \n"
- "pld [%0, 1792] \n"
+ "pld [%0, #1792] \n"
"vst1.8 {d0}, [%1]! \n"
"vst1.8 {d1}, [%2]! \n"
"bgt 1b \n"
@@ -622,6 +667,101 @@ void DetileSplitUVRow_NEON(const uint8_t* src_uv,
);
}
+#if LIBYUV_USE_ST2
+// Read 16 Y, 8 UV, and write 8 YUYV.
+void DetileToYUY2_NEON(const uint8_t* src_y,
+ ptrdiff_t src_y_tile_stride,
+ const uint8_t* src_uv,
+ ptrdiff_t src_uv_tile_stride,
+ uint8_t* dst_yuy2,
+ int width) {
+ asm volatile(
+ "1: \n"
+ "vld1.8 {q0}, [%0], %4 \n" // Load 16 Y
+ "pld [%0, #1792] \n"
+ "vld1.8 {q1}, [%1], %5 \n" // Load 8 UV
+ "pld [%1, #1792] \n"
+ "subs %3, %3, #16 \n"
+ "vst2.8 {q0, q1}, [%2]! \n"
+ "bgt 1b \n"
+ : "+r"(src_y), // %0
+ "+r"(src_uv), // %1
+ "+r"(dst_yuy2), // %2
+ "+r"(width) // %3
+ : "r"(src_y_tile_stride), // %4
+ "r"(src_uv_tile_stride) // %5
+ : "cc", "memory", "d0", "d1", "d2", "d3" // Clobber list
+ );
+}
+#else
+// Read 16 Y, 8 UV, and write 8 YUYV.
+void DetileToYUY2_NEON(const uint8_t* src_y,
+ ptrdiff_t src_y_tile_stride,
+ const uint8_t* src_uv,
+ ptrdiff_t src_uv_tile_stride,
+ uint8_t* dst_yuy2,
+ int width) {
+ asm volatile(
+ "1: \n"
+ "vld1.8 {q0}, [%0], %4 \n" // Load 16 Y
+ "vld1.8 {q1}, [%1], %5 \n" // Load 8 UV
+ "subs %3, %3, #16 \n"
+ "pld [%0, #1792] \n"
+ "vzip.8 q0, q1 \n"
+ "pld [%1, #1792] \n"
+ "vst1.8 {q0, q1}, [%2]! \n"
+ "bgt 1b \n"
+ : "+r"(src_y), // %0
+ "+r"(src_uv), // %1
+ "+r"(dst_yuy2), // %2
+ "+r"(width) // %3
+ : "r"(src_y_tile_stride), // %4
+ "r"(src_uv_tile_stride) // %5
+ : "cc", "memory", "q0", "q1", "q2", "q3" // Clobber list
+ );
+}
+#endif
+
+void UnpackMT2T_NEON(const uint8_t* src, uint16_t* dst, size_t size) {
+ asm volatile(
+ "1: \n"
+ "vld1.8 {q14}, [%0]! \n" // Load lower bits.
+ "vld1.8 {q9}, [%0]! \n" // Load upper bits row
+ // by row.
+ "vld1.8 {q11}, [%0]! \n"
+ "vld1.8 {q13}, [%0]! \n"
+ "vld1.8 {q15}, [%0]! \n"
+ "vshl.u8 q8, q14, #6 \n" // Shift lower bit data
+ // appropriately.
+ "vshl.u8 q10, q14, #4 \n"
+ "vshl.u8 q12, q14, #2 \n"
+ "vzip.u8 q8, q9 \n" // Interleave upper and
+ // lower bits.
+ "vzip.u8 q10, q11 \n"
+ "vzip.u8 q12, q13 \n"
+ "vzip.u8 q14, q15 \n"
+ "vsri.u16 q8, q8, #10 \n" // Copy upper 6 bits
+ // into lower 6 bits for
+ // better accuracy in
+ // conversions.
+ "vsri.u16 q9, q9, #10 \n"
+ "vsri.u16 q10, q10, #10 \n"
+ "vsri.u16 q11, q11, #10 \n"
+ "vsri.u16 q12, q12, #10 \n"
+ "vsri.u16 q13, q13, #10 \n"
+ "vsri.u16 q14, q14, #10 \n"
+ "vsri.u16 q15, q15, #10 \n"
+ "vstmia %1!, {q8-q15} \n" // Store pixel block (64
+ // pixels).
+ "subs %2, %2, #80 \n"
+ "bgt 1b \n"
+ : "+r"(src), // %0
+ "+r"(dst), // %1
+ "+r"(size) // %2
+ :
+ : "cc", "memory", "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15");
+}
+
// Reads 16 U's and V's and writes out 16 pairs of UV.
void MergeUVRow_NEON(const uint8_t* src_u,
const uint8_t* src_v,
@@ -664,7 +804,7 @@ void SplitRGBRow_NEON(const uint8_t* src_rgb,
"+r"(dst_b), // %3
"+r"(width) // %4
: // Input registers
- : "cc", "memory", "d0", "d1", "d2" // Clobber List
+ : "cc", "memory", "q0", "q1", "q2" // Clobber List
);
}
@@ -1505,6 +1645,29 @@ void UYVYToUVRow_NEON(const uint8_t* src_uyvy,
);
}
+void YUY2ToNVUVRow_NEON(const uint8_t* src_yuy2,
+ int stride_yuy2,
+ uint8_t* dst_uv,
+ int width) {
+ asm volatile(
+ "add %1, %0, %1 \n" // stride + src_yuy2
+ "1: \n"
+ "vld2.8 {q0, q1}, [%0]! \n" // load 16 pixels of YUY2.
+ "subs %3, %3, #16 \n" // 16 pixels = 8 UVs.
+ "vld2.8 {q2, q3}, [%1]! \n" // load next row YUY2.
+ "vrhadd.u8 q4, q1, q3 \n" // average rows of UV
+ "vst1.8 {q4}, [%2]! \n" // store 8 UV.
+ "bgt 1b \n"
+ : "+r"(src_yuy2), // %0
+ "+r"(stride_yuy2), // %1
+ "+r"(dst_uv), // %2
+ "+r"(width) // %3
+ :
+ : "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6",
+ "d7" // Clobber List
+ );
+}
+
// For BGRAToARGB, ABGRToARGB, RGBAToARGB, and ARGBToRGBA.
void ARGBShuffleRow_NEON(const uint8_t* src_argb,
uint8_t* dst_argb,
@@ -1590,7 +1753,7 @@ void ARGBToRGB565Row_NEON(const uint8_t* src_argb,
void ARGBToRGB565DitherRow_NEON(const uint8_t* src_argb,
uint8_t* dst_rgb,
- const uint32_t dither4,
+ uint32_t dither4,
int width) {
asm volatile(
"vdup.32 d7, %2 \n" // dither4
@@ -1762,7 +1925,7 @@ void ARGBToUVRow_NEON(const uint8_t* src_argb,
);
}
-// TODO(fbarchard): Subsample match C code.
+// TODO(fbarchard): Subsample match Intel code.
void ARGBToUVJRow_NEON(const uint8_t* src_argb,
int src_stride_argb,
uint8_t* dst_u,
@@ -1808,6 +1971,51 @@ void ARGBToUVJRow_NEON(const uint8_t* src_argb,
);
}
+void ABGRToUVJRow_NEON(const uint8_t* src_abgr,
+ int src_stride_abgr,
+ uint8_t* dst_uj,
+ uint8_t* dst_vj,
+ int width) {
+ asm volatile (
+ "add %1, %0, %1 \n" // src_stride + src_argb
+ "vmov.s16 q10, #127 / 2 \n" // UB / VR 0.500 coefficient
+ "vmov.s16 q11, #84 / 2 \n" // UG -0.33126 coefficient
+ "vmov.s16 q12, #43 / 2 \n" // UR -0.16874 coefficient
+ "vmov.s16 q13, #20 / 2 \n" // VB -0.08131 coefficient
+ "vmov.s16 q14, #107 / 2 \n" // VG -0.41869 coefficient
+ "vmov.u16 q15, #0x8080 \n" // 128.5
+ "1: \n"
+ "vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 ABGR pixels.
+ "vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 ABGR pixels.
+ "vpaddl.u8 q0, q0 \n" // R 16 bytes -> 8 shorts.
+ "vpaddl.u8 q1, q1 \n" // G 16 bytes -> 8 shorts.
+ "vpaddl.u8 q2, q2 \n" // B 16 bytes -> 8 shorts.
+ "vld4.8 {d8, d10, d12, d14}, [%1]! \n" // load 8 more ABGR pixels.
+ "vld4.8 {d9, d11, d13, d15}, [%1]! \n" // load last 8 ABGR pixels.
+ "vpadal.u8 q0, q4 \n" // R 16 bytes -> 8 shorts.
+ "vpadal.u8 q1, q5 \n" // G 16 bytes -> 8 shorts.
+ "vpadal.u8 q2, q6 \n" // B 16 bytes -> 8 shorts.
+
+ "vrshr.u16 q0, q0, #1 \n" // 2x average
+ "vrshr.u16 q1, q1, #1 \n"
+ "vrshr.u16 q2, q2, #1 \n"
+
+ "subs %4, %4, #16 \n" // 16 processed per loop.
+ RGBTOUV(q2, q1, q0)
+ "vst1.8 {d0}, [%2]! \n" // store 8 pixels U.
+ "vst1.8 {d1}, [%3]! \n" // store 8 pixels V.
+ "bgt 1b \n"
+ : "+r"(src_abgr), // %0
+ "+r"(src_stride_abgr), // %1
+ "+r"(dst_uj), // %2
+ "+r"(dst_vj), // %3
+ "+r"(width) // %4
+ :
+ : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
+ "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
+ );
+}
+
// TODO(fbarchard): Subsample match C code.
void RGB24ToUVJRow_NEON(const uint8_t* src_rgb24,
int src_stride_rgb24,
@@ -2567,6 +2775,10 @@ void ABGRToYRow_NEON(const uint8_t* src_abgr, uint8_t* dst_y, int width) {
ARGBToYMatrixRow_NEON(src_abgr, dst_y, width, &kRawI601Constants);
}
+void ABGRToYJRow_NEON(const uint8_t* src_abgr, uint8_t* dst_yj, int width) {
+ ARGBToYMatrixRow_NEON(src_abgr, dst_yj, width, &kRawJPEGConstants);
+}
+
// RGBA expects first value to be A and ignored, then 3 values to contain RGB.
// Same code as ARGB, except the LD4
void RGBAToYMatrixRow_NEON(const uint8_t* src_rgba,
@@ -3633,7 +3845,7 @@ void SplitUVRow_16_NEON(const uint16_t* src_uv,
"+r"(dst_v), // %2
"+r"(width) // %3
: "r"(shift) // %4
- : "cc", "memory", "q0", "q1", "q2", "q3", "q4");
+ : "cc", "memory", "q0", "q1", "q2");
}
void MergeUVRow_16_NEON(const uint16_t* src_u,
@@ -3687,31 +3899,25 @@ void DivideRow_16_NEON(const uint16_t* src_y,
int scale,
int width) {
asm volatile(
- "vdup.16 q0, %3 \n"
- "1: \n"
- "vld1.16 {q1}, [%0]! \n"
- "vld1.16 {q2}, [%0]! \n"
- "vmovl.u16 q3, d2 \n"
- "vmovl.u16 q1, d3 \n"
- "vmovl.u16 q4, d4 \n"
- "vmovl.u16 q2, d5 \n"
- "vshl.u32 q3, q3, q0 \n"
- "vshl.u32 q4, q4, q0 \n"
- "vshl.u32 q1, q1, q0 \n"
- "vshl.u32 q2, q2, q0 \n"
- "vmovn.u32 d2, q3 \n"
- "vmovn.u32 d3, q1 \n"
- "vmovn.u32 d4, q4 \n"
- "vmovn.u32 d5, q2 \n"
- "vst1.16 {q1}, [%1]! \n"
- "vst1.16 {q2}, [%1]! \n"
+ "vdup.16 d8, %3 \n"
+ "1: \n"
+ "vld1.16 {q2, q3}, [%0]! \n"
+ "vmull.u16 q0, d4, d8 \n"
+ "vmull.u16 q1, d5, d8 \n"
+ "vmull.u16 q2, d6, d8 \n"
+ "vmull.u16 q3, d7, d8 \n"
+ "vshrn.u32 d0, q0, #16 \n"
+ "vshrn.u32 d1, q1, #16 \n"
+ "vshrn.u32 d2, q2, #16 \n"
+ "vshrn.u32 d3, q3, #16 \n"
+ "vst1.16 {q0, q1}, [%1]! \n" // store 16 pixels
"subs %2, %2, #16 \n" // 16 src pixels per loop
"bgt 1b \n"
: "+r"(src_y), // %0
"+r"(dst_y), // %1
"+r"(width) // %2
: "r"(scale) // %3
- : "cc", "memory", "q0", "q1", "q2", "q3", "q4");
+ : "cc", "memory", "q0", "q1", "q2", "q3", "d8");
}
// Use scale to convert lsb formats to msb, depending how many bits there are:
diff --git a/files/source/row_neon64.cc b/files/source/row_neon64.cc
index 0f120373..74190d61 100644
--- a/files/source/row_neon64.cc
+++ b/files/source/row_neon64.cc
@@ -142,6 +142,29 @@ void I444ToARGBRow_NEON(const uint8_t* src_y,
: "cc", "memory", YUVTORGB_REGS, "v19");
}
+void I444ToRGB24Row_NEON(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_rgb24,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ asm volatile(
+ YUVTORGB_SETUP
+ "1: \n" READYUV444 YUVTORGB
+ RGBTORGB8
+ "subs %w[width], %w[width], #8 \n"
+ "st3 {v16.8b,v17.8b,v18.8b}, [%[dst_rgb24]], #24 \n"
+ "b.gt 1b \n"
+ : [src_y] "+r"(src_y), // %[src_y]
+ [src_u] "+r"(src_u), // %[src_u]
+ [src_v] "+r"(src_v), // %[src_v]
+ [dst_rgb24] "+r"(dst_rgb24), // %[dst_rgb24]
+ [width] "+r"(width) // %[width]
+ : [kUVCoeff] "r"(&yuvconstants->kUVCoeff), // %[kUVCoeff]
+ [kRGBCoeffBias] "r"(&yuvconstants->kRGBCoeffBias) // %[kRGBCoeffBias]
+ : "cc", "memory", YUVTORGB_REGS);
+}
+
void I422ToARGBRow_NEON(const uint8_t* src_y,
const uint8_t* src_u,
const uint8_t* src_v,
@@ -627,6 +650,26 @@ void DetileRow_NEON(const uint8_t* src,
);
}
+// Reads 16 byte Y's of 16 bits from tile and writes out 16 Y's.
+void DetileRow_16_NEON(const uint16_t* src,
+ ptrdiff_t src_tile_stride,
+ uint16_t* dst,
+ int width) {
+ asm volatile(
+ "1: \n"
+ "ld1 {v0.8h,v1.8h}, [%0], %3 \n" // load 16 pixels
+ "subs %w2, %w2, #16 \n" // 16 processed per loop
+ "prfm pldl1keep, [%0, 3584] \n" // 7 tiles of 512b ahead
+ "st1 {v0.8h,v1.8h}, [%1], #32 \n" // store 16 pixels
+ "b.gt 1b \n"
+ : "+r"(src), // %0
+ "+r"(dst), // %1
+ "+r"(width) // %2
+ : "r"(src_tile_stride * 2) // %3
+ : "cc", "memory", "v0", "v1" // Clobber List
+ );
+}
+
// Read 16 bytes of UV, detile, and write 8 bytes of U and 8 bytes of V.
void DetileSplitUVRow_NEON(const uint8_t* src_uv,
ptrdiff_t src_tile_stride,
@@ -651,6 +694,100 @@ void DetileSplitUVRow_NEON(const uint8_t* src_uv,
}
#if LIBYUV_USE_ST2
+// Read 16 Y, 8 UV, and write 8 YUY2
+void DetileToYUY2_NEON(const uint8_t* src_y,
+ ptrdiff_t src_y_tile_stride,
+ const uint8_t* src_uv,
+ ptrdiff_t src_uv_tile_stride,
+ uint8_t* dst_yuy2,
+ int width) {
+ asm volatile(
+ "1: \n"
+ "ld1 {v0.16b}, [%0], %4 \n" // load 16 Ys
+ "prfm pldl1keep, [%0, 1792] \n"
+ "ld1 {v1.16b}, [%1], %5 \n" // load 8 UVs
+ "prfm pldl1keep, [%1, 1792] \n"
+ "subs %w3, %w3, #16 \n" // store 8 YUY2
+ "st2 {v0.16b,v1.16b}, [%2], #32 \n"
+ "b.gt 1b \n"
+ : "+r"(src_y), // %0
+ "+r"(src_uv), // %1
+ "+r"(dst_yuy2), // %2
+ "+r"(width) // %3
+ : "r"(src_y_tile_stride), // %4
+ "r"(src_uv_tile_stride) // %5
+ : "cc", "memory", "v0", "v1" // Clobber list
+ );
+}
+#else
+// Read 16 Y, 8 UV, and write 8 YUY2
+void DetileToYUY2_NEON(const uint8_t* src_y,
+ ptrdiff_t src_y_tile_stride,
+ const uint8_t* src_uv,
+ ptrdiff_t src_uv_tile_stride,
+ uint8_t* dst_yuy2,
+ int width) {
+ asm volatile(
+ "1: \n"
+ "ld1 {v0.16b}, [%0], %4 \n" // load 16 Ys
+ "ld1 {v1.16b}, [%1], %5 \n" // load 8 UVs
+ "subs %w3, %w3, #16 \n"
+ "prfm pldl1keep, [%0, 1792] \n"
+ "zip1 v2.16b, v0.16b, v1.16b \n"
+ "prfm pldl1keep, [%1, 1792] \n"
+ "zip2 v3.16b, v0.16b, v1.16b \n"
+ "st1 {v2.16b,v3.16b}, [%2], #32 \n" // store 8 YUY2
+ "b.gt 1b \n"
+ : "+r"(src_y), // %0
+ "+r"(src_uv), // %1
+ "+r"(dst_yuy2), // %2
+ "+r"(width) // %3
+ : "r"(src_y_tile_stride), // %4
+ "r"(src_uv_tile_stride) // %5
+ : "cc", "memory", "v0", "v1", "v2", "v3" // Clobber list
+ );
+}
+#endif
+
+// Unpack MT2T into tiled P010 64 pixels at a time. See
+// tinyurl.com/mtk-10bit-video-format for format documentation.
+void UnpackMT2T_NEON(const uint8_t* src, uint16_t* dst, size_t size) {
+ asm volatile(
+ "1: \n"
+ "ld1 {v7.16b}, [%0], #16 \n"
+ "ld1 {v0.16b-v3.16b}, [%0], #64 \n"
+ "shl v4.16b, v7.16b, #6 \n"
+ "shl v5.16b, v7.16b, #4 \n"
+ "shl v6.16b, v7.16b, #2 \n"
+ "subs %2, %2, #80 \n"
+ "zip1 v16.16b, v4.16b, v0.16b \n"
+ "zip1 v18.16b, v5.16b, v1.16b \n"
+ "zip1 v20.16b, v6.16b, v2.16b \n"
+ "zip1 v22.16b, v7.16b, v3.16b \n"
+ "zip2 v17.16b, v4.16b, v0.16b \n"
+ "zip2 v19.16b, v5.16b, v1.16b \n"
+ "zip2 v21.16b, v6.16b, v2.16b \n"
+ "zip2 v23.16b, v7.16b, v3.16b \n"
+ "sri v16.8h, v16.8h, #10 \n"
+ "sri v17.8h, v17.8h, #10 \n"
+ "sri v18.8h, v18.8h, #10 \n"
+ "sri v19.8h, v19.8h, #10 \n"
+ "st1 {v16.8h-v19.8h}, [%1], #64 \n"
+ "sri v20.8h, v20.8h, #10 \n"
+ "sri v21.8h, v21.8h, #10 \n"
+ "sri v22.8h, v22.8h, #10 \n"
+ "sri v23.8h, v23.8h, #10 \n"
+ "st1 {v20.8h-v23.8h}, [%1], #64 \n"
+ "b.gt 1b \n"
+ : "+r"(src), // %0
+ "+r"(dst), // %1
+ "+r"(size) // %2
+ :
+ : "cc", "memory", "w0", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
+ "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23");
+}
+
+#if LIBYUV_USE_ST2
// Reads 16 U's and V's and writes out 16 pairs of UV.
void MergeUVRow_NEON(const uint8_t* src_u,
const uint8_t* src_v,
@@ -1729,6 +1866,29 @@ void UYVYToUVRow_NEON(const uint8_t* src_uyvy,
);
}
+void YUY2ToNVUVRow_NEON(const uint8_t* src_yuy2,
+ int stride_yuy2,
+ uint8_t* dst_uv,
+ int width) {
+ const uint8_t* src_yuy2b = src_yuy2 + stride_yuy2;
+ asm volatile(
+ "1: \n"
+ "ld2 {v0.16b,v1.16b}, [%0], #32 \n" // load 16 pixels
+ "subs %w3, %w3, #16 \n" // 16 pixels = 8 UVs.
+ "ld2 {v2.16b,v3.16b}, [%1], #32 \n" // load next row
+ "urhadd v4.16b, v1.16b, v3.16b \n" // average rows of UV
+ "prfm pldl1keep, [%0, 448] \n"
+ "st1 {v4.16b}, [%2], #16 \n" // store 8 UV.
+ "b.gt 1b \n"
+ : "+r"(src_yuy2), // %0
+ "+r"(src_yuy2b), // %1
+ "+r"(dst_uv), // %2
+ "+r"(width) // %3
+ :
+ : "cc", "memory", "v0", "v1", "v2", "v3", "v4" // Clobber List
+ );
+}
+
// For BGRAToARGB, ABGRToARGB, RGBAToARGB, and ARGBToRGBA.
void ARGBShuffleRow_NEON(const uint8_t* src_argb,
uint8_t* dst_argb,
@@ -1819,24 +1979,23 @@ void ARGBToRGB565Row_NEON(const uint8_t* src_argb,
void ARGBToRGB565DitherRow_NEON(const uint8_t* src_argb,
uint8_t* dst_rgb,
- const uint32_t dither4,
+ uint32_t dither4,
int width) {
asm volatile(
- "dup v1.4s, %w2 \n" // dither4
+ "dup v1.4s, %w3 \n" // dither4
"1: \n"
- "ld4 {v16.8b,v17.8b,v18.8b,v19.8b}, [%1], #32 \n" // load 8
- // pixels
- "subs %w3, %w3, #8 \n" // 8 processed per loop.
+ "ld4 {v16.8b,v17.8b,v18.8b,v19.8b}, [%0], #32 \n" // load 8 ARGB
+ "subs %w2, %w2, #8 \n" // 8 processed per loop.
"uqadd v16.8b, v16.8b, v1.8b \n"
"prfm pldl1keep, [%0, 448] \n"
"uqadd v17.8b, v17.8b, v1.8b \n"
"uqadd v18.8b, v18.8b, v1.8b \n" ARGBTORGB565
- "st1 {v18.16b}, [%0], #16 \n" // store 8 pixels RGB565.
+ "st1 {v18.16b}, [%1], #16 \n" // store 8 pixels RGB565.
"b.gt 1b \n"
- : "+r"(dst_rgb) // %0
- : "r"(src_argb), // %1
- "r"(dither4), // %2
- "r"(width) // %3
+ : "+r"(src_argb), // %0
+ "+r"(dst_rgb), // %1
+ "+r"(width) // %2
+ : "r"(dither4) // %3
: "cc", "memory", "v1", "v16", "v17", "v18", "v19");
}
@@ -2144,6 +2303,7 @@ void ARGBToUVRow_NEON(const uint8_t* src_argb,
);
}
+// TODO(fbarchard): Subsample match Intel code.
void ARGBToUVJRow_NEON(const uint8_t* src_argb,
int src_stride_argb,
uint8_t* dst_u,
@@ -2189,6 +2349,51 @@ void ARGBToUVJRow_NEON(const uint8_t* src_argb,
);
}
+void ABGRToUVJRow_NEON(const uint8_t* src_abgr,
+ int src_stride_abgr,
+ uint8_t* dst_uj,
+ uint8_t* dst_vj,
+ int width) {
+ const uint8_t* src_abgr_1 = src_abgr + src_stride_abgr;
+ asm volatile (
+ "movi v20.8h, #63, lsl #0 \n" // UB/VR coeff (0.500) / 2
+ "movi v21.8h, #42, lsl #0 \n" // UG coeff (-0.33126) / 2
+ "movi v22.8h, #21, lsl #0 \n" // UR coeff (-0.16874) / 2
+ "movi v23.8h, #10, lsl #0 \n" // VB coeff (-0.08131) / 2
+ "movi v24.8h, #53, lsl #0 \n" // VG coeff (-0.41869) / 2
+ "movi v25.16b, #0x80 \n" // 128.5 (0x8080 in 16-bit)
+ "1: \n"
+ "ld4 {v0.16b,v1.16b,v2.16b,v3.16b}, [%0], #64 \n" // load 16 pixels.
+ "uaddlp v0.8h, v0.16b \n" // R 16 bytes -> 8 shorts.
+ "prfm pldl1keep, [%0, 448] \n"
+ "uaddlp v1.8h, v1.16b \n" // G 16 bytes -> 8 shorts.
+ "uaddlp v2.8h, v2.16b \n" // B 16 bytes -> 8 shorts.
+ "ld4 {v4.16b,v5.16b,v6.16b,v7.16b}, [%1], #64 \n" // load next 16
+ "uadalp v0.8h, v4.16b \n" // R 16 bytes -> 8 shorts.
+ "prfm pldl1keep, [%1, 448] \n"
+ "uadalp v1.8h, v5.16b \n" // G 16 bytes -> 8 shorts.
+ "uadalp v2.8h, v6.16b \n" // B 16 bytes -> 8 shorts.
+
+ "urshr v0.8h, v0.8h, #1 \n" // 2x average
+ "urshr v1.8h, v1.8h, #1 \n"
+ "urshr v2.8h, v2.8h, #1 \n"
+
+ "subs %w4, %w4, #16 \n" // 16 processed per loop.
+ RGBTOUV(v2.8h, v1.8h, v0.8h)
+ "st1 {v0.8b}, [%2], #8 \n" // store 8 pixels U.
+ "st1 {v1.8b}, [%3], #8 \n" // store 8 pixels V.
+ "b.gt 1b \n"
+ : "+r"(src_abgr), // %0
+ "+r"(src_abgr_1), // %1
+ "+r"(dst_uj), // %2
+ "+r"(dst_vj), // %3
+ "+r"(width) // %4
+ :
+ : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
+ "v20", "v21", "v22", "v23", "v24", "v25"
+ );
+}
+
void RGB24ToUVJRow_NEON(const uint8_t* src_rgb24,
int src_stride_rgb24,
uint8_t* dst_u,
@@ -2812,6 +3017,10 @@ void ABGRToYRow_NEON(const uint8_t* src_abgr, uint8_t* dst_y, int width) {
ARGBToYMatrixRow_NEON(src_abgr, dst_y, width, &kRawI601Constants);
}
+void ABGRToYJRow_NEON(const uint8_t* src_abgr, uint8_t* dst_yj, int width) {
+ ARGBToYMatrixRow_NEON(src_abgr, dst_yj, width, &kRawJPEGConstants);
+}
+
// RGBA expects first value to be A and ignored, then 3 values to contain RGB.
// Same code as ARGB, except the LD4
void RGBAToYMatrixRow_NEON(const uint8_t* src_rgba,
@@ -4241,23 +4450,19 @@ void DivideRow_16_NEON(const uint16_t* src_y,
int scale,
int width) {
asm volatile(
- "dup v0.8h, %w3 \n"
+ "dup v4.8h, %w3 \n"
"1: \n"
- "ldp q1, q2, [%0], #32 \n"
- "ushll v3.4s, v1.4h, #0 \n"
- "ushll v4.4s, v2.4h, #0 \n"
+ "ldp q2, q3, [%0], #32 \n"
+ "umull v0.4s, v2.4h, v4.4h \n"
+ "umull2 v1.4s, v2.8h, v4.8h \n"
+ "umull v2.4s, v3.4h, v4.4h \n"
+ "umull2 v3.4s, v3.8h, v4.8h \n"
"prfm pldl1keep, [%0, 448] \n"
- "ushll2 v1.4s, v1.8h, #0 \n"
- "ushll2 v2.4s, v2.8h, #0 \n"
- "mul v3.4s, v0.4s, v3.4s \n"
- "mul v4.4s, v0.4s, v4.4s \n"
- "mul v1.4s, v0.4s, v1.4s \n"
- "mul v2.4s, v0.4s, v2.4s \n"
- "shrn v3.4h, v3.4s, #16 \n"
- "shrn v4.4h, v4.4s, #16 \n"
- "shrn2 v3.8h, v1.4s, #16 \n"
- "shrn2 v4.8h, v2.4s, #16 \n"
- "stp q3, q3, [%1], #32 \n" // store 16 pixels
+ "shrn v0.4h, v0.4s, #16 \n"
+ "shrn2 v0.8h, v1.4s, #16 \n"
+ "shrn v1.4h, v2.4s, #16 \n"
+ "shrn2 v1.8h, v3.4s, #16 \n"
+ "stp q0, q1, [%1], #32 \n" // store 16 pixels
"subs %w2, %w2, #16 \n" // 16 src pixels per loop
"b.gt 1b \n"
: "+r"(src_y), // %0
diff --git a/files/source/row_rvv.cc b/files/source/row_rvv.cc
new file mode 100644
index 00000000..27e91a3b
--- /dev/null
+++ b/files/source/row_rvv.cc
@@ -0,0 +1,956 @@
+/*
+ * Copyright 2023 The LibYuv Project Authors. All rights reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+/*
+ * Copyright (c) 2023 SiFive, Inc. All rights reserved.
+ *
+ * Contributed by Darren Hsieh <darren.hsieh@sifive.com>
+ * Contributed by Bruce Lai <bruce.lai@sifive.com>
+ */
+
+#include "libyuv/row.h"
+
+#if !defined(LIBYUV_DISABLE_RVV) && defined(__riscv_vector)
+#include <assert.h>
+#include <riscv_vector.h>
+
+#ifdef __cplusplus
+namespace libyuv {
+extern "C" {
+#endif
+
+// Fill YUV -> RGB conversion constants into vectors
+// NOTE: To match behavior on other platforms, vxrm (fixed-point rounding mode
+// register) is set to round-to-nearest-up mode(0).
+#define YUVTORGB_SETUP(vl, yuvconst, ub, vr, ug, vg, yg, bb, bg, br) \
+ { \
+ asm volatile("csrwi vxrm, 0"); \
+ ub = yuvconst->kUVCoeff[0]; \
+ vr = yuvconst->kUVCoeff[1]; \
+ ug = yuvconst->kUVCoeff[2]; \
+ vg = yuvconst->kUVCoeff[3]; \
+ yg = yuvconst->kRGBCoeffBias[0]; \
+ bb = yuvconst->kRGBCoeffBias[1] + 32; \
+ bg = yuvconst->kRGBCoeffBias[2] - 32; \
+ br = yuvconst->kRGBCoeffBias[3] + 32; \
+ }
+
+// Read [VLEN/8] Y, [VLEN/(8 * 2)] U and [VLEN/(8 * 2)] V from 422
+#define READYUV422(vl, v_u, v_v, v_y_16) \
+ { \
+ vuint8m1_t v_tmp0, v_tmp1; \
+ vuint8m2_t v_y; \
+ vuint16m2_t v_u_16, v_v_16; \
+ vl = __riscv_vsetvl_e8m1((w + 1) / 2); \
+ v_tmp0 = __riscv_vle8_v_u8m1(src_u, vl); \
+ v_u_16 = __riscv_vwaddu_vx_u16m2(v_tmp0, 0, vl); \
+ v_tmp1 = __riscv_vle8_v_u8m1(src_v, vl); \
+ v_v_16 = __riscv_vwaddu_vx_u16m2(v_tmp1, 0, vl); \
+ v_v_16 = __riscv_vmul_vx_u16m2(v_v_16, 0x0101, vl); \
+ v_u_16 = __riscv_vmul_vx_u16m2(v_u_16, 0x0101, vl); \
+ v_v = __riscv_vreinterpret_v_u16m2_u8m2(v_v_16); \
+ v_u = __riscv_vreinterpret_v_u16m2_u8m2(v_u_16); \
+ vl = __riscv_vsetvl_e8m2(w); \
+ v_y = __riscv_vle8_v_u8m2(src_y, vl); \
+ v_y_16 = __riscv_vwaddu_vx_u16m4(v_y, 0, vl); \
+ }
+
+// Read [VLEN/8] Y, [VLEN/8] U, and [VLEN/8] V from 444
+#define READYUV444(vl, v_u, v_v, v_y_16) \
+ { \
+ vuint8m2_t v_y; \
+ vl = __riscv_vsetvl_e8m2(w); \
+ v_y = __riscv_vle8_v_u8m2(src_y, vl); \
+ v_u = __riscv_vle8_v_u8m2(src_u, vl); \
+ v_v = __riscv_vle8_v_u8m2(src_v, vl); \
+ v_y_16 = __riscv_vwaddu_vx_u16m4(v_y, 0, vl); \
+ }
+
+// Convert from YUV to fixed point RGB
+#define YUVTORGB(vl, v_u, v_v, ub, vr, ug, vg, yg, bb, bg, br, v_y_16, v_g_16, \
+ v_b_16, v_r_16) \
+ { \
+ vuint16m4_t v_tmp0, v_tmp1, v_tmp2, v_tmp3, v_tmp4; \
+ vuint32m8_t v_tmp5; \
+ v_tmp0 = __riscv_vwmulu_vx_u16m4(v_u, ug, vl); \
+ v_y_16 = __riscv_vmul_vx_u16m4(v_y_16, 0x0101, vl); \
+ v_tmp0 = __riscv_vwmaccu_vx_u16m4(v_tmp0, vg, v_v, vl); \
+ v_tmp1 = __riscv_vwmulu_vx_u16m4(v_u, ub, vl); \
+ v_tmp5 = __riscv_vwmulu_vx_u32m8(v_y_16, yg, vl); \
+ v_tmp2 = __riscv_vnsrl_wx_u16m4(v_tmp5, 16, vl); \
+ v_tmp3 = __riscv_vadd_vx_u16m4(v_tmp2, bg, vl); \
+ v_tmp4 = __riscv_vadd_vv_u16m4(v_tmp2, v_tmp1, vl); \
+ v_tmp2 = __riscv_vwmaccu_vx_u16m4(v_tmp2, vr, v_v, vl); \
+ v_g_16 = __riscv_vssubu_vv_u16m4(v_tmp3, v_tmp0, vl); \
+ v_b_16 = __riscv_vssubu_vx_u16m4(v_tmp4, bb, vl); \
+ v_r_16 = __riscv_vssubu_vx_u16m4(v_tmp2, br, vl); \
+ }
+
+// Convert from fixed point RGB To 8 bit RGB
+#define RGBTORGB8(vl, v_g_16, v_b_16, v_r_16, v_g, v_b, v_r) \
+ { \
+ v_g = __riscv_vnclipu_wx_u8m2(v_g_16, 6, vl); \
+ v_b = __riscv_vnclipu_wx_u8m2(v_b_16, 6, vl); \
+ v_r = __riscv_vnclipu_wx_u8m2(v_r_16, 6, vl); \
+ }
+
+void ARGBToAR64Row_RVV(const uint8_t* src_argb, uint16_t* dst_ar64, int width) {
+ size_t avl = (size_t)4 * width;
+ do {
+ vuint16m8_t v_ar64;
+ vuint8m4_t v_argb;
+ size_t vl = __riscv_vsetvl_e8m4(avl);
+ v_argb = __riscv_vle8_v_u8m4(src_argb, vl);
+ v_ar64 = __riscv_vwaddu_vx_u16m8(v_argb, 0, vl);
+ v_ar64 = __riscv_vmul_vx_u16m8(v_ar64, 0x0101, vl);
+ __riscv_vse16_v_u16m8(dst_ar64, v_ar64, vl);
+ avl -= vl;
+ src_argb += vl;
+ dst_ar64 += vl;
+ } while (avl > 0);
+}
+
+void ARGBToAB64Row_RVV(const uint8_t* src_argb, uint16_t* dst_ab64, int width) {
+ size_t avl = (size_t)width;
+ do {
+ vuint16m2_t v_b_16, v_g_16, v_r_16, v_a_16;
+ vuint8m1_t v_b, v_g, v_r, v_a;
+ size_t vl = __riscv_vsetvl_e8m1(avl);
+ __riscv_vlseg4e8_v_u8m1(&v_b, &v_g, &v_r, &v_a, src_argb, vl);
+ v_b_16 = __riscv_vwaddu_vx_u16m2(v_b, 0, vl);
+ v_g_16 = __riscv_vwaddu_vx_u16m2(v_g, 0, vl);
+ v_r_16 = __riscv_vwaddu_vx_u16m2(v_r, 0, vl);
+ v_a_16 = __riscv_vwaddu_vx_u16m2(v_a, 0, vl);
+ v_b_16 = __riscv_vmul_vx_u16m2(v_b_16, 0x0101, vl);
+ v_g_16 = __riscv_vmul_vx_u16m2(v_g_16, 0x0101, vl);
+ v_r_16 = __riscv_vmul_vx_u16m2(v_r_16, 0x0101, vl);
+ v_a_16 = __riscv_vmul_vx_u16m2(v_a_16, 0x0101, vl);
+ __riscv_vsseg4e16_v_u16m2(dst_ab64, v_r_16, v_g_16, v_b_16, v_a_16, vl);
+ avl -= vl;
+ src_argb += 4 * vl;
+ dst_ab64 += 4 * vl;
+ } while (avl > 0);
+}
+
+void AR64ToARGBRow_RVV(const uint16_t* src_ar64, uint8_t* dst_argb, int width) {
+ size_t avl = (size_t)4 * width;
+ do {
+ vuint16m8_t v_ar64;
+ vuint8m4_t v_argb;
+ size_t vl = __riscv_vsetvl_e16m8(avl);
+ v_ar64 = __riscv_vle16_v_u16m8(src_ar64, vl);
+ v_argb = __riscv_vnsrl_wx_u8m4(v_ar64, 8, vl);
+ __riscv_vse8_v_u8m4(dst_argb, v_argb, vl);
+ avl -= vl;
+ src_ar64 += vl;
+ dst_argb += vl;
+ } while (avl > 0);
+}
+
+void AB64ToARGBRow_RVV(const uint16_t* src_ab64, uint8_t* dst_argb, int width) {
+ size_t avl = (size_t)width;
+ do {
+ vuint16m2_t v_b_16, v_g_16, v_r_16, v_a_16;
+ vuint8m1_t v_b, v_g, v_r, v_a;
+ size_t vl = __riscv_vsetvl_e16m2(avl);
+ __riscv_vlseg4e16_v_u16m2(&v_r_16, &v_g_16, &v_b_16, &v_a_16, src_ab64, vl);
+ v_b = __riscv_vnsrl_wx_u8m1(v_b_16, 8, vl);
+ v_g = __riscv_vnsrl_wx_u8m1(v_g_16, 8, vl);
+ v_r = __riscv_vnsrl_wx_u8m1(v_r_16, 8, vl);
+ v_a = __riscv_vnsrl_wx_u8m1(v_a_16, 8, vl);
+ __riscv_vsseg4e8_v_u8m1(dst_argb, v_b, v_g, v_r, v_a, vl);
+ avl -= vl;
+ src_ab64 += 4 * vl;
+ dst_argb += 4 * vl;
+ } while (avl > 0);
+}
+
+void RAWToARGBRow_RVV(const uint8_t* src_raw, uint8_t* dst_argb, int width) {
+ size_t w = (size_t)width;
+ size_t vl = __riscv_vsetvl_e8m2(w);
+ vuint8m2_t v_a = __riscv_vmv_v_x_u8m2(255u, vl);
+ do {
+ vuint8m2_t v_b, v_g, v_r;
+ __riscv_vlseg3e8_v_u8m2(&v_r, &v_g, &v_b, src_raw, vl);
+ __riscv_vsseg4e8_v_u8m2(dst_argb, v_b, v_g, v_r, v_a, vl);
+ w -= vl;
+ src_raw += vl * 3;
+ dst_argb += vl * 4;
+ vl = __riscv_vsetvl_e8m2(w);
+ } while (w > 0);
+}
+
+void RAWToRGBARow_RVV(const uint8_t* src_raw, uint8_t* dst_rgba, int width) {
+ size_t w = (size_t)width;
+ size_t vl = __riscv_vsetvl_e8m2(w);
+ vuint8m2_t v_a = __riscv_vmv_v_x_u8m2(255u, vl);
+ do {
+ vuint8m2_t v_b, v_g, v_r;
+ __riscv_vlseg3e8_v_u8m2(&v_r, &v_g, &v_b, src_raw, vl);
+ __riscv_vsseg4e8_v_u8m2(dst_rgba, v_a, v_b, v_g, v_r, vl);
+ w -= vl;
+ src_raw += vl * 3;
+ dst_rgba += vl * 4;
+ vl = __riscv_vsetvl_e8m2(w);
+ } while (w > 0);
+}
+
+void RAWToRGB24Row_RVV(const uint8_t* src_raw, uint8_t* dst_rgb24, int width) {
+ size_t w = (size_t)width;
+ do {
+ vuint8m2_t v_b, v_g, v_r;
+ size_t vl = __riscv_vsetvl_e8m2(w);
+ __riscv_vlseg3e8_v_u8m2(&v_b, &v_g, &v_r, src_raw, vl);
+ __riscv_vsseg3e8_v_u8m2(dst_rgb24, v_r, v_g, v_b, vl);
+ w -= vl;
+ src_raw += vl * 3;
+ dst_rgb24 += vl * 3;
+ } while (w > 0);
+}
+
+void ARGBToRAWRow_RVV(const uint8_t* src_argb, uint8_t* dst_raw, int width) {
+ size_t w = (size_t)width;
+ do {
+ vuint8m2_t v_b, v_g, v_r, v_a;
+ size_t vl = __riscv_vsetvl_e8m2(w);
+ __riscv_vlseg4e8_v_u8m2(&v_b, &v_g, &v_r, &v_a, src_argb, vl);
+ __riscv_vsseg3e8_v_u8m2(dst_raw, v_r, v_g, v_b, vl);
+ w -= vl;
+ src_argb += vl * 4;
+ dst_raw += vl * 3;
+ } while (w > 0);
+}
+
+void ARGBToRGB24Row_RVV(const uint8_t* src_argb,
+ uint8_t* dst_rgb24,
+ int width) {
+ size_t w = (size_t)width;
+ do {
+ vuint8m2_t v_b, v_g, v_r, v_a;
+ size_t vl = __riscv_vsetvl_e8m2(w);
+ __riscv_vlseg4e8_v_u8m2(&v_b, &v_g, &v_r, &v_a, src_argb, vl);
+ __riscv_vsseg3e8_v_u8m2(dst_rgb24, v_b, v_g, v_r, vl);
+ w -= vl;
+ src_argb += vl * 4;
+ dst_rgb24 += vl * 3;
+ } while (w > 0);
+}
+
+void RGB24ToARGBRow_RVV(const uint8_t* src_rgb24,
+ uint8_t* dst_argb,
+ int width) {
+ size_t w = (size_t)width;
+ size_t vl = __riscv_vsetvl_e8m2(w);
+ vuint8m2_t v_a = __riscv_vmv_v_x_u8m2(255u, vl);
+ do {
+ vuint8m2_t v_b, v_g, v_r;
+ __riscv_vlseg3e8_v_u8m2(&v_b, &v_g, &v_r, src_rgb24, vl);
+ __riscv_vsseg4e8_v_u8m2(dst_argb, v_b, v_g, v_r, v_a, vl);
+ w -= vl;
+ src_rgb24 += vl * 3;
+ dst_argb += vl * 4;
+ vl = __riscv_vsetvl_e8m2(w);
+ } while (w > 0);
+}
+
+void I444ToARGBRow_RVV(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ size_t vl;
+ size_t w = (size_t)width;
+ uint8_t ub, vr, ug, vg;
+ int16_t yg, bb, bg, br;
+ vuint8m2_t v_u, v_v;
+ vuint8m2_t v_b, v_g, v_r, v_a;
+ vuint16m4_t v_y_16, v_g_16, v_b_16, v_r_16;
+ YUVTORGB_SETUP(vl, yuvconstants, ub, vr, ug, vg, yg, bb, bg, br);
+ v_a = __riscv_vmv_v_x_u8m2(255u, vl);
+ do {
+ READYUV444(vl, v_u, v_v, v_y_16);
+ YUVTORGB(vl, v_u, v_v, ub, vr, ug, vg, yg, bb, bg, br, v_y_16, v_g_16,
+ v_b_16, v_r_16);
+ RGBTORGB8(vl, v_g_16, v_b_16, v_r_16, v_g, v_b, v_r);
+ __riscv_vsseg4e8_v_u8m2(dst_argb, v_b, v_g, v_r, v_a, vl);
+ w -= vl;
+ src_y += vl;
+ src_u += vl;
+ src_v += vl;
+ dst_argb += vl * 4;
+ } while (w > 0);
+}
+
+void I444AlphaToARGBRow_RVV(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ const uint8_t* src_a,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ size_t vl;
+ size_t w = (size_t)width;
+ uint8_t ub, vr, ug, vg;
+ int16_t yg, bb, bg, br;
+ vuint8m2_t v_u, v_v;
+ vuint8m2_t v_b, v_g, v_r, v_a;
+ vuint16m4_t v_y_16, v_g_16, v_b_16, v_r_16;
+ YUVTORGB_SETUP(vl, yuvconstants, ub, vr, ug, vg, yg, bb, bg, br);
+ do {
+ READYUV444(vl, v_u, v_v, v_y_16);
+ v_a = __riscv_vle8_v_u8m2(src_a, vl);
+ YUVTORGB(vl, v_u, v_v, ub, vr, ug, vg, yg, bb, bg, br, v_y_16, v_g_16,
+ v_b_16, v_r_16);
+ RGBTORGB8(vl, v_g_16, v_b_16, v_r_16, v_g, v_b, v_r);
+ __riscv_vsseg4e8_v_u8m2(dst_argb, v_b, v_g, v_r, v_a, vl);
+ w -= vl;
+ src_y += vl;
+ src_a += vl;
+ src_u += vl;
+ src_v += vl;
+ dst_argb += vl * 4;
+ } while (w > 0);
+}
+
+void I444ToRGB24Row_RVV(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_rgb24,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ size_t vl;
+ size_t w = (size_t)width;
+ uint8_t ub, vr, ug, vg;
+ int16_t yg, bb, bg, br;
+ vuint8m2_t v_u, v_v;
+ vuint8m2_t v_b, v_g, v_r;
+ vuint16m4_t v_y_16, v_g_16, v_b_16, v_r_16;
+ YUVTORGB_SETUP(vl, yuvconstants, ub, vr, ug, vg, yg, bb, bg, br);
+ do {
+ READYUV444(vl, v_u, v_v, v_y_16);
+ YUVTORGB(vl, v_u, v_v, ub, vr, ug, vg, yg, bb, bg, br, v_y_16, v_g_16,
+ v_b_16, v_r_16);
+ RGBTORGB8(vl, v_g_16, v_b_16, v_r_16, v_g, v_b, v_r);
+ __riscv_vsseg3e8_v_u8m2(dst_rgb24, v_b, v_g, v_r, vl);
+ w -= vl;
+ src_y += vl;
+ src_u += vl;
+ src_v += vl;
+ dst_rgb24 += vl * 3;
+ } while (w > 0);
+}
+
+void I422ToARGBRow_RVV(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ size_t vl;
+ size_t w = (size_t)width;
+ uint8_t ub, vr, ug, vg;
+ int16_t yg, bb, bg, br;
+ vuint8m2_t v_u, v_v;
+ vuint8m2_t v_b, v_g, v_r, v_a;
+ vuint16m4_t v_y_16, v_g_16, v_b_16, v_r_16;
+ YUVTORGB_SETUP(vl, yuvconstants, ub, vr, ug, vg, yg, bb, bg, br);
+ v_a = __riscv_vmv_v_x_u8m2(255u, vl);
+ do {
+ READYUV422(vl, v_u, v_v, v_y_16);
+ YUVTORGB(vl, v_u, v_v, ub, vr, ug, vg, yg, bb, bg, br, v_y_16, v_g_16,
+ v_b_16, v_r_16);
+ RGBTORGB8(vl, v_g_16, v_b_16, v_r_16, v_g, v_b, v_r);
+ __riscv_vsseg4e8_v_u8m2(dst_argb, v_b, v_g, v_r, v_a, vl);
+ w -= vl;
+ src_y += vl;
+ src_u += vl / 2;
+ src_v += vl / 2;
+ dst_argb += vl * 4;
+ } while (w > 0);
+}
+
+void I422AlphaToARGBRow_RVV(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ const uint8_t* src_a,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ size_t vl;
+ size_t w = (size_t)width;
+ uint8_t ub, vr, ug, vg;
+ int16_t yg, bb, bg, br;
+ vuint8m2_t v_u, v_v;
+ vuint8m2_t v_b, v_g, v_r, v_a;
+ vuint16m4_t v_y_16, v_g_16, v_b_16, v_r_16;
+ YUVTORGB_SETUP(vl, yuvconstants, ub, vr, ug, vg, yg, bb, bg, br);
+ do {
+ READYUV422(vl, v_u, v_v, v_y_16);
+ v_a = __riscv_vle8_v_u8m2(src_a, vl);
+ YUVTORGB(vl, v_u, v_v, ub, vr, ug, vg, yg, bb, bg, br, v_y_16, v_g_16,
+ v_b_16, v_r_16);
+ RGBTORGB8(vl, v_g_16, v_b_16, v_r_16, v_g, v_b, v_r);
+ __riscv_vsseg4e8_v_u8m2(dst_argb, v_b, v_g, v_r, v_a, vl);
+ w -= vl;
+ src_y += vl;
+ src_a += vl;
+ src_u += vl / 2;
+ src_v += vl / 2;
+ dst_argb += vl * 4;
+ } while (w > 0);
+}
+
+void I422ToRGBARow_RVV(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_rgba,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ size_t vl;
+ size_t w = (size_t)width;
+ uint8_t ub, vr, ug, vg;
+ int16_t yg, bb, bg, br;
+ vuint8m2_t v_u, v_v;
+ vuint8m2_t v_b, v_g, v_r, v_a;
+ vuint16m4_t v_y_16, v_g_16, v_b_16, v_r_16;
+ YUVTORGB_SETUP(vl, yuvconstants, ub, vr, ug, vg, yg, bb, bg, br);
+ v_a = __riscv_vmv_v_x_u8m2(255u, vl);
+ do {
+ READYUV422(vl, v_u, v_v, v_y_16);
+ YUVTORGB(vl, v_u, v_v, ub, vr, ug, vg, yg, bb, bg, br, v_y_16, v_g_16,
+ v_b_16, v_r_16);
+ RGBTORGB8(vl, v_g_16, v_b_16, v_r_16, v_g, v_b, v_r);
+ __riscv_vsseg4e8_v_u8m2(dst_rgba, v_a, v_b, v_g, v_r, vl);
+ w -= vl;
+ src_y += vl;
+ src_u += vl / 2;
+ src_v += vl / 2;
+ dst_rgba += vl * 4;
+ } while (w > 0);
+}
+
+void I422ToRGB24Row_RVV(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_rgb24,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ size_t vl;
+ size_t w = (size_t)width;
+ uint8_t ub, vr, ug, vg;
+ int16_t yg, bb, bg, br;
+ vuint8m2_t v_u, v_v;
+ vuint8m2_t v_b, v_g, v_r;
+ vuint16m4_t v_y_16, v_g_16, v_b_16, v_r_16;
+ YUVTORGB_SETUP(vl, yuvconstants, ub, vr, ug, vg, yg, bb, bg, br);
+ do {
+ READYUV422(vl, v_u, v_v, v_y_16);
+ YUVTORGB(vl, v_u, v_v, ub, vr, ug, vg, yg, bb, bg, br, v_y_16, v_g_16,
+ v_b_16, v_r_16);
+ RGBTORGB8(vl, v_g_16, v_b_16, v_r_16, v_g, v_b, v_r);
+ __riscv_vsseg3e8_v_u8m2(dst_rgb24, v_b, v_g, v_r, vl);
+ w -= vl;
+ src_y += vl;
+ src_u += vl / 2;
+ src_v += vl / 2;
+ dst_rgb24 += vl * 3;
+ } while (w > 0);
+}
+
+void I400ToARGBRow_RVV(const uint8_t* src_y,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ size_t w = (size_t)width;
+ size_t vl = __riscv_vsetvl_e8m2(w);
+ const bool is_yb_positive = (yuvconstants->kRGBCoeffBias[4] >= 0);
+ vuint8m2_t v_a = __riscv_vmv_v_x_u8m2(255u, vl);
+ vuint16m4_t v_yb;
+ vuint16m4_t v_yg = __riscv_vmv_v_x_u16m4(yuvconstants->kRGBCoeffBias[0], vl);
+ // To match behavior on other platforms, vxrm (fixed-point rounding mode
+ // register) sets to round-to-nearest-up mode(0).
+ asm volatile("csrwi vxrm, 0");
+ if (is_yb_positive) {
+ v_yb = __riscv_vmv_v_x_u16m4(yuvconstants->kRGBCoeffBias[4] - 32, vl);
+ } else {
+ v_yb = __riscv_vmv_v_x_u16m4(-yuvconstants->kRGBCoeffBias[4] + 32, vl);
+ }
+ do {
+ vuint8m2_t v_y, v_out;
+ vuint16m4_t v_y_16, v_tmp0, v_tmp1, v_tmp2;
+ vl = __riscv_vsetvl_e8m2(w);
+ v_y = __riscv_vle8_v_u8m2(src_y, vl);
+ v_y_16 = __riscv_vwaddu_vx_u16m4(v_y, 0, vl);
+ v_tmp0 = __riscv_vmul_vx_u16m4(v_y_16, 0x0101, vl); // 257 * v_y
+ v_tmp1 = __riscv_vmulhu_vv_u16m4(v_tmp0, v_yg, vl);
+ if (is_yb_positive) {
+ v_tmp2 = __riscv_vsaddu_vv_u16m4(v_tmp1, v_yb, vl);
+ } else {
+ v_tmp2 = __riscv_vssubu_vv_u16m4(v_tmp1, v_yb, vl);
+ }
+ v_out = __riscv_vnclipu_wx_u8m2(v_tmp2, 6, vl);
+ __riscv_vsseg4e8_v_u8m2(dst_argb, v_out, v_out, v_out, v_a, vl);
+ w -= vl;
+ src_y += vl;
+ dst_argb += vl * 4;
+ } while (w > 0);
+}
+
+void J400ToARGBRow_RVV(const uint8_t* src_y, uint8_t* dst_argb, int width) {
+ size_t w = (size_t)width;
+ size_t vl = __riscv_vsetvl_e8m2(w);
+ vuint8m2_t v_a = __riscv_vmv_v_x_u8m2(255u, vl);
+ do {
+ vuint8m2_t v_y;
+ v_y = __riscv_vle8_v_u8m2(src_y, vl);
+ __riscv_vsseg4e8_v_u8m2(dst_argb, v_y, v_y, v_y, v_a, vl);
+ w -= vl;
+ src_y += vl;
+ dst_argb += vl * 4;
+ vl = __riscv_vsetvl_e8m2(w);
+ } while (w > 0);
+}
+
+void CopyRow_RVV(const uint8_t* src, uint8_t* dst, int width) {
+ size_t w = (size_t)width;
+ do {
+ size_t vl = __riscv_vsetvl_e8m8(w);
+ vuint8m8_t v_data = __riscv_vle8_v_u8m8(src, vl);
+ __riscv_vse8_v_u8m8(dst, v_data, vl);
+ w -= vl;
+ src += vl;
+ dst += vl;
+ } while (w > 0);
+}
+
+// Bilinear filter [VLEN/8]x2 -> [VLEN/8]x1
+void InterpolateRow_RVV(uint8_t* dst_ptr,
+ const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ int dst_width,
+ int source_y_fraction) {
+ int y1_fraction = source_y_fraction;
+ int y0_fraction = 256 - y1_fraction;
+ const uint8_t* src_ptr1 = src_ptr + src_stride;
+ size_t dst_w = (size_t)dst_width;
+ assert(source_y_fraction >= 0);
+ assert(source_y_fraction < 256);
+ // Blend 100 / 0 - Copy row unchanged.
+ if (y1_fraction == 0) {
+ do {
+ size_t vl = __riscv_vsetvl_e8m8(dst_w);
+ __riscv_vse8_v_u8m8(dst_ptr, __riscv_vle8_v_u8m8(src_ptr, vl), vl);
+ dst_w -= vl;
+ src_ptr += vl;
+ dst_ptr += vl;
+ } while (dst_w > 0);
+ return;
+ }
+ // Blend 50 / 50.
+ if (y1_fraction == 128) {
+ do {
+ size_t vl = __riscv_vsetvl_e8m8(dst_w);
+ vuint8m8_t row0 = __riscv_vle8_v_u8m8(src_ptr, vl);
+ vuint8m8_t row1 = __riscv_vle8_v_u8m8(src_ptr1, vl);
+ // Averaging add
+ vuint8m8_t row_out = __riscv_vaaddu_vv_u8m8(row0, row1, vl);
+ __riscv_vse8_v_u8m8(dst_ptr, row_out, vl);
+ dst_w -= vl;
+ src_ptr += vl;
+ src_ptr1 += vl;
+ dst_ptr += vl;
+ } while (dst_w > 0);
+ return;
+ }
+ // General purpose row blend.
+ // To match behavior on other platforms, vxrm (fixed-point rounding mode
+ // register) is set to round-to-nearest-up(0).
+ asm volatile("csrwi vxrm, 0");
+ do {
+ size_t vl = __riscv_vsetvl_e8m4(dst_w);
+ vuint8m4_t row0 = __riscv_vle8_v_u8m4(src_ptr, vl);
+ vuint16m8_t acc = __riscv_vwmulu_vx_u16m8(row0, y0_fraction, vl);
+ vuint8m4_t row1 = __riscv_vle8_v_u8m4(src_ptr1, vl);
+ acc = __riscv_vwmaccu_vx_u16m8(acc, y1_fraction, row1, vl);
+ __riscv_vse8_v_u8m4(dst_ptr, __riscv_vnclipu_wx_u8m4(acc, 8, vl), vl);
+ dst_w -= vl;
+ src_ptr += vl;
+ src_ptr1 += vl;
+ dst_ptr += vl;
+ } while (dst_w > 0);
+}
+
+void SplitRGBRow_RVV(const uint8_t* src_rgb,
+ uint8_t* dst_r,
+ uint8_t* dst_g,
+ uint8_t* dst_b,
+ int width) {
+ size_t w = (size_t)width;
+ do {
+ vuint8m2_t v_b, v_g, v_r;
+ size_t vl = __riscv_vsetvl_e8m2(w);
+ __riscv_vlseg3e8_v_u8m2(&v_r, &v_g, &v_b, src_rgb, vl);
+ __riscv_vse8_v_u8m2(dst_r, v_r, vl);
+ __riscv_vse8_v_u8m2(dst_g, v_g, vl);
+ __riscv_vse8_v_u8m2(dst_b, v_b, vl);
+ w -= vl;
+ dst_r += vl;
+ dst_g += vl;
+ dst_b += vl;
+ src_rgb += vl * 3;
+ } while (w > 0);
+}
+
+void MergeRGBRow_RVV(const uint8_t* src_r,
+ const uint8_t* src_g,
+ const uint8_t* src_b,
+ uint8_t* dst_rgb,
+ int width) {
+ size_t w = (size_t)width;
+ do {
+ size_t vl = __riscv_vsetvl_e8m2(w);
+ vuint8m2_t v_r = __riscv_vle8_v_u8m2(src_r, vl);
+ vuint8m2_t v_g = __riscv_vle8_v_u8m2(src_g, vl);
+ vuint8m2_t v_b = __riscv_vle8_v_u8m2(src_b, vl);
+ __riscv_vsseg3e8_v_u8m2(dst_rgb, v_r, v_g, v_b, vl);
+ w -= vl;
+ src_r += vl;
+ src_g += vl;
+ src_b += vl;
+ dst_rgb += vl * 3;
+ } while (w > 0);
+}
+
+void SplitARGBRow_RVV(const uint8_t* src_argb,
+ uint8_t* dst_r,
+ uint8_t* dst_g,
+ uint8_t* dst_b,
+ uint8_t* dst_a,
+ int width) {
+ size_t w = (size_t)width;
+ do {
+ vuint8m2_t v_b, v_g, v_r, v_a;
+ size_t vl = __riscv_vsetvl_e8m2(w);
+ __riscv_vlseg4e8_v_u8m2(&v_b, &v_g, &v_r, &v_a, src_argb, vl);
+ __riscv_vse8_v_u8m2(dst_a, v_a, vl);
+ __riscv_vse8_v_u8m2(dst_r, v_r, vl);
+ __riscv_vse8_v_u8m2(dst_g, v_g, vl);
+ __riscv_vse8_v_u8m2(dst_b, v_b, vl);
+ w -= vl;
+ dst_a += vl;
+ dst_r += vl;
+ dst_g += vl;
+ dst_b += vl;
+ src_argb += vl * 4;
+ } while (w > 0);
+}
+
+void MergeARGBRow_RVV(const uint8_t* src_r,
+ const uint8_t* src_g,
+ const uint8_t* src_b,
+ const uint8_t* src_a,
+ uint8_t* dst_argb,
+ int width) {
+ size_t w = (size_t)width;
+ do {
+ size_t vl = __riscv_vsetvl_e8m2(w);
+ vuint8m2_t v_r = __riscv_vle8_v_u8m2(src_r, vl);
+ vuint8m2_t v_g = __riscv_vle8_v_u8m2(src_g, vl);
+ vuint8m2_t v_b = __riscv_vle8_v_u8m2(src_b, vl);
+ vuint8m2_t v_a = __riscv_vle8_v_u8m2(src_a, vl);
+ __riscv_vsseg4e8_v_u8m2(dst_argb, v_b, v_g, v_r, v_a, vl);
+ w -= vl;
+ src_r += vl;
+ src_g += vl;
+ src_b += vl;
+ src_a += vl;
+ dst_argb += vl * 4;
+ } while (w > 0);
+}
+
+void SplitXRGBRow_RVV(const uint8_t* src_argb,
+ uint8_t* dst_r,
+ uint8_t* dst_g,
+ uint8_t* dst_b,
+ int width) {
+ size_t w = (size_t)width;
+ do {
+ vuint8m2_t v_b, v_g, v_r, v_a;
+ size_t vl = __riscv_vsetvl_e8m2(w);
+ __riscv_vlseg4e8_v_u8m2(&v_b, &v_g, &v_r, &v_a, src_argb, vl);
+ __riscv_vse8_v_u8m2(dst_r, v_r, vl);
+ __riscv_vse8_v_u8m2(dst_g, v_g, vl);
+ __riscv_vse8_v_u8m2(dst_b, v_b, vl);
+ w -= vl;
+ dst_r += vl;
+ dst_g += vl;
+ dst_b += vl;
+ src_argb += vl * 4;
+ } while (w > 0);
+}
+
+void MergeXRGBRow_RVV(const uint8_t* src_r,
+ const uint8_t* src_g,
+ const uint8_t* src_b,
+ uint8_t* dst_argb,
+ int width) {
+ size_t w = (size_t)width;
+ size_t vl = __riscv_vsetvl_e8m2(w);
+ vuint8m2_t v_a = __riscv_vmv_v_x_u8m2(255u, vl);
+ do {
+ vuint8m2_t v_r, v_g, v_b;
+ v_r = __riscv_vle8_v_u8m2(src_r, vl);
+ v_g = __riscv_vle8_v_u8m2(src_g, vl);
+ v_b = __riscv_vle8_v_u8m2(src_b, vl);
+ __riscv_vsseg4e8_v_u8m2(dst_argb, v_b, v_g, v_r, v_a, vl);
+ w -= vl;
+ src_r += vl;
+ src_g += vl;
+ src_b += vl;
+ dst_argb += vl * 4;
+ vl = __riscv_vsetvl_e8m2(w);
+ } while (w > 0);
+}
+
+void SplitUVRow_RVV(const uint8_t* src_uv,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width) {
+ size_t w = (size_t)width;
+ do {
+ size_t vl = __riscv_vsetvl_e8m4(w);
+ vuint8m4_t v_u, v_v;
+ __riscv_vlseg2e8_v_u8m4(&v_u, &v_v, src_uv, vl);
+ __riscv_vse8_v_u8m4(dst_u, v_u, vl);
+ __riscv_vse8_v_u8m4(dst_v, v_v, vl);
+ w -= vl;
+ dst_u += vl;
+ dst_v += vl;
+ src_uv += 2 * vl;
+ } while (w > 0);
+}
+
+void MergeUVRow_RVV(const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_uv,
+ int width) {
+ size_t w = (size_t)width;
+ do {
+ vuint8m4_t v_u, v_v;
+ size_t vl = __riscv_vsetvl_e8m4(w);
+ v_u = __riscv_vle8_v_u8m4(src_u, vl);
+ v_v = __riscv_vle8_v_u8m4(src_v, vl);
+ __riscv_vsseg2e8_v_u8m4(dst_uv, v_u, v_v, vl);
+ w -= vl;
+ src_u += vl;
+ src_v += vl;
+ dst_uv += 2 * vl;
+ } while (w > 0);
+}
+
+struct RgbConstants {
+ uint8_t kRGBToY[4];
+ uint16_t kAddY;
+ uint16_t pad;
+};
+
+// RGB to JPeg coefficients
+// B * 0.1140 coefficient = 29
+// G * 0.5870 coefficient = 150
+// R * 0.2990 coefficient = 77
+// Add 0.5 = 0x80
+static const struct RgbConstants kRgb24JPEGConstants = {{29, 150, 77, 0},
+ 128,
+ 0};
+
+static const struct RgbConstants kRawJPEGConstants = {{77, 150, 29, 0}, 128, 0};
+
+// RGB to BT.601 coefficients
+// B * 0.1016 coefficient = 25
+// G * 0.5078 coefficient = 129
+// R * 0.2578 coefficient = 66
+// Add 16.5 = 0x1080
+
+static const struct RgbConstants kRgb24I601Constants = {{25, 129, 66, 0},
+ 0x1080,
+ 0};
+
+static const struct RgbConstants kRawI601Constants = {{66, 129, 25, 0},
+ 0x1080,
+ 0};
+
+// ARGB expects first 3 values to contain RGB and 4th value is ignored.
+void ARGBToYMatrixRow_RVV(const uint8_t* src_argb,
+ uint8_t* dst_y,
+ int width,
+ const struct RgbConstants* rgbconstants) {
+ assert(width != 0);
+ size_t w = (size_t)width;
+ vuint8m2_t v_by, v_gy, v_ry; // vectors are to store RGBToY constant
+ vuint16m4_t v_addy; // vector is to store kAddY
+ size_t vl = __riscv_vsetvl_e8m2(w);
+ v_by = __riscv_vmv_v_x_u8m2(rgbconstants->kRGBToY[0], vl);
+ v_gy = __riscv_vmv_v_x_u8m2(rgbconstants->kRGBToY[1], vl);
+ v_ry = __riscv_vmv_v_x_u8m2(rgbconstants->kRGBToY[2], vl);
+ v_addy = __riscv_vmv_v_x_u16m4(rgbconstants->kAddY, vl);
+ do {
+ vuint8m2_t v_b, v_g, v_r, v_a, v_y;
+ vuint16m4_t v_y_u16;
+ size_t vl = __riscv_vsetvl_e8m2(w);
+ __riscv_vlseg4e8_v_u8m2(&v_b, &v_g, &v_r, &v_a, src_argb, vl);
+ v_y_u16 = __riscv_vwmulu_vv_u16m4(v_r, v_ry, vl);
+ v_y_u16 = __riscv_vwmaccu_vv_u16m4(v_y_u16, v_gy, v_g, vl);
+ v_y_u16 = __riscv_vwmaccu_vv_u16m4(v_y_u16, v_by, v_b, vl);
+ v_y_u16 = __riscv_vadd_vv_u16m4(v_y_u16, v_addy, vl);
+ v_y = __riscv_vnsrl_wx_u8m2(v_y_u16, 8, vl);
+ __riscv_vse8_v_u8m2(dst_y, v_y, vl);
+ w -= vl;
+ src_argb += 4 * vl;
+ dst_y += vl;
+ } while (w > 0);
+}
+
+void ARGBToYRow_RVV(const uint8_t* src_argb, uint8_t* dst_y, int width) {
+ ARGBToYMatrixRow_RVV(src_argb, dst_y, width, &kRgb24I601Constants);
+}
+
+void ARGBToYJRow_RVV(const uint8_t* src_argb, uint8_t* dst_yj, int width) {
+ ARGBToYMatrixRow_RVV(src_argb, dst_yj, width, &kRgb24JPEGConstants);
+}
+
+void ABGRToYRow_RVV(const uint8_t* src_abgr, uint8_t* dst_y, int width) {
+ ARGBToYMatrixRow_RVV(src_abgr, dst_y, width, &kRawI601Constants);
+}
+
+void ABGRToYJRow_RVV(const uint8_t* src_abgr, uint8_t* dst_yj, int width) {
+ ARGBToYMatrixRow_RVV(src_abgr, dst_yj, width, &kRawJPEGConstants);
+}
+
+// RGBA expects first value to be A and ignored, then 3 values to contain RGB.
+void RGBAToYMatrixRow_RVV(const uint8_t* src_rgba,
+ uint8_t* dst_y,
+ int width,
+ const struct RgbConstants* rgbconstants) {
+ assert(width != 0);
+ size_t w = (size_t)width;
+ vuint8m2_t v_by, v_gy, v_ry; // vectors are to store RGBToY constant
+ vuint16m4_t v_addy; // vector is to store kAddY
+ size_t vl = __riscv_vsetvl_e8m2(w);
+ v_by = __riscv_vmv_v_x_u8m2(rgbconstants->kRGBToY[0], vl);
+ v_gy = __riscv_vmv_v_x_u8m2(rgbconstants->kRGBToY[1], vl);
+ v_ry = __riscv_vmv_v_x_u8m2(rgbconstants->kRGBToY[2], vl);
+ v_addy = __riscv_vmv_v_x_u16m4(rgbconstants->kAddY, vl);
+ do {
+ vuint8m2_t v_b, v_g, v_r, v_a, v_y;
+ vuint16m4_t v_y_u16;
+ size_t vl = __riscv_vsetvl_e8m2(w);
+ __riscv_vlseg4e8_v_u8m2(&v_a, &v_b, &v_g, &v_r, src_rgba, vl);
+ v_y_u16 = __riscv_vwmulu_vv_u16m4(v_r, v_ry, vl);
+ v_y_u16 = __riscv_vwmaccu_vv_u16m4(v_y_u16, v_gy, v_g, vl);
+ v_y_u16 = __riscv_vwmaccu_vv_u16m4(v_y_u16, v_by, v_b, vl);
+ v_y_u16 = __riscv_vadd_vv_u16m4(v_y_u16, v_addy, vl);
+ v_y = __riscv_vnsrl_wx_u8m2(v_y_u16, 8, vl);
+ __riscv_vse8_v_u8m2(dst_y, v_y, vl);
+ w -= vl;
+ src_rgba += 4 * vl;
+ dst_y += vl;
+ } while (w > 0);
+}
+
+void RGBAToYRow_RVV(const uint8_t* src_rgba, uint8_t* dst_y, int width) {
+ RGBAToYMatrixRow_RVV(src_rgba, dst_y, width, &kRgb24I601Constants);
+}
+
+void RGBAToYJRow_RVV(const uint8_t* src_rgba, uint8_t* dst_yj, int width) {
+ RGBAToYMatrixRow_RVV(src_rgba, dst_yj, width, &kRgb24JPEGConstants);
+}
+
+void BGRAToYRow_RVV(const uint8_t* src_bgra, uint8_t* dst_y, int width) {
+ RGBAToYMatrixRow_RVV(src_bgra, dst_y, width, &kRawI601Constants);
+}
+
+void RGBToYMatrixRow_RVV(const uint8_t* src_rgb,
+ uint8_t* dst_y,
+ int width,
+ const struct RgbConstants* rgbconstants) {
+ assert(width != 0);
+ size_t w = (size_t)width;
+ vuint8m2_t v_by, v_gy, v_ry; // vectors are to store RGBToY constant
+ vuint16m4_t v_addy; // vector is to store kAddY
+ size_t vl = __riscv_vsetvl_e8m2(w);
+ v_by = __riscv_vmv_v_x_u8m2(rgbconstants->kRGBToY[0], vl);
+ v_gy = __riscv_vmv_v_x_u8m2(rgbconstants->kRGBToY[1], vl);
+ v_ry = __riscv_vmv_v_x_u8m2(rgbconstants->kRGBToY[2], vl);
+ v_addy = __riscv_vmv_v_x_u16m4(rgbconstants->kAddY, vl);
+ do {
+ vuint8m2_t v_b, v_g, v_r, v_y;
+ vuint16m4_t v_y_u16;
+ size_t vl = __riscv_vsetvl_e8m2(w);
+ __riscv_vlseg3e8_v_u8m2(&v_b, &v_g, &v_r, src_rgb, vl);
+ v_y_u16 = __riscv_vwmulu_vv_u16m4(v_r, v_ry, vl);
+ v_y_u16 = __riscv_vwmaccu_vv_u16m4(v_y_u16, v_gy, v_g, vl);
+ v_y_u16 = __riscv_vwmaccu_vv_u16m4(v_y_u16, v_by, v_b, vl);
+ v_y_u16 = __riscv_vadd_vv_u16m4(v_y_u16, v_addy, vl);
+ v_y = __riscv_vnsrl_wx_u8m2(v_y_u16, 8, vl);
+ __riscv_vse8_v_u8m2(dst_y, v_y, vl);
+ w -= vl;
+ src_rgb += 3 * vl;
+ dst_y += vl;
+ } while (w > 0);
+}
+
+void RGB24ToYJRow_RVV(const uint8_t* src_rgb24, uint8_t* dst_yj, int width) {
+ RGBToYMatrixRow_RVV(src_rgb24, dst_yj, width, &kRgb24JPEGConstants);
+}
+
+void RAWToYJRow_RVV(const uint8_t* src_raw, uint8_t* dst_yj, int width) {
+ RGBToYMatrixRow_RVV(src_raw, dst_yj, width, &kRawJPEGConstants);
+}
+
+void RGB24ToYRow_RVV(const uint8_t* src_rgb24, uint8_t* dst_y, int width) {
+ RGBToYMatrixRow_RVV(src_rgb24, dst_y, width, &kRgb24I601Constants);
+}
+
+void RAWToYRow_RVV(const uint8_t* src_raw, uint8_t* dst_y, int width) {
+ RGBToYMatrixRow_RVV(src_raw, dst_y, width, &kRawI601Constants);
+}
+
+void ARGBAttenuateRow_RVV(const uint8_t* src_argb,
+ uint8_t* dst_argb,
+ int width) {
+ size_t w = (size_t)width;
+ // To match behavior on other platforms, vxrm (fixed-point rounding mode
+ // register) is set to round-to-nearest-up(0).
+ asm volatile("csrwi vxrm, 0");
+ do {
+ vuint8m2_t v_b, v_g, v_r, v_a;
+ vuint16m4_t v_ba_16, v_ga_16, v_ra_16;
+ size_t vl = __riscv_vsetvl_e8m2(w);
+ __riscv_vlseg4e8_v_u8m2(&v_b, &v_g, &v_r, &v_a, src_argb, vl);
+ v_ba_16 = __riscv_vwmulu_vv_u16m4(v_b, v_a, vl);
+ v_ga_16 = __riscv_vwmulu_vv_u16m4(v_g, v_a, vl);
+ v_ra_16 = __riscv_vwmulu_vv_u16m4(v_r, v_a, vl);
+ v_b = __riscv_vnclipu_wx_u8m2(v_ba_16, 8, vl);
+ v_g = __riscv_vnclipu_wx_u8m2(v_ga_16, 8, vl);
+ v_r = __riscv_vnclipu_wx_u8m2(v_ra_16, 8, vl);
+ __riscv_vsseg4e8_v_u8m2(dst_argb, v_b, v_g, v_r, v_a, vl);
+ w -= vl;
+ src_argb += vl * 4;
+ dst_argb += vl * 4;
+ } while (w > 0);
+}
+
+#ifdef __cplusplus
+} // extern "C"
+} // namespace libyuv
+#endif
+
+#endif // !defined(LIBYUV_DISABLE_RVV) && defined(__riscv_vector)
diff --git a/files/source/row_win.cc b/files/source/row_win.cc
index c7c1ff60..5fb28521 100644
--- a/files/source/row_win.cc
+++ b/files/source/row_win.cc
@@ -14,7 +14,9 @@
#if !defined(LIBYUV_DISABLE_X86) && defined(_MSC_VER) && \
!defined(__clang__) && (defined(_M_IX86) || defined(_M_X64))
-#if defined(_M_X64)
+#if defined(_M_ARM64EC)
+#include <intrin.h>
+#elif defined(_M_X64)
#include <emmintrin.h>
#include <tmmintrin.h> // For _mm_maddubs_epi16
#endif
@@ -893,7 +895,7 @@ __declspec(naked) void ARGBToRGB565Row_SSE2(const uint8_t* src_argb,
__declspec(naked) void ARGBToRGB565DitherRow_SSE2(const uint8_t* src_argb,
uint8_t* dst_rgb,
- const uint32_t dither4,
+ uint32_t dither4,
int width) {
__asm {
@@ -940,7 +942,7 @@ __declspec(naked) void ARGBToRGB565DitherRow_SSE2(const uint8_t* src_argb,
#ifdef HAS_ARGBTORGB565DITHERROW_AVX2
__declspec(naked) void ARGBToRGB565DitherRow_AVX2(const uint8_t* src_argb,
uint8_t* dst_rgb,
- const uint32_t dither4,
+ uint32_t dither4,
int width) {
__asm {
mov eax, [esp + 4] // src_argb
@@ -2789,6 +2791,44 @@ __declspec(naked) void I422ToRGB24Row_SSSE3(
}
}
+// 8 pixels.
+// 8 UV values, mixed with 8 Y producing 8 RGB24 (24 bytes).
+__declspec(naked) void I444ToRGB24Row_SSSE3(
+ const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_rgb24,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ __asm {
+ push esi
+ push edi
+ push ebx
+ mov eax, [esp + 12 + 4] // Y
+ mov esi, [esp + 12 + 8] // U
+ mov edi, [esp + 12 + 12] // V
+ mov edx, [esp + 12 + 16] // argb
+ mov ebx, [esp + 12 + 20] // yuvconstants
+ mov ecx, [esp + 12 + 24] // width
+ sub edi, esi
+ movdqa xmm5, xmmword ptr kShuffleMaskARGBToRGB24_0
+ movdqa xmm6, xmmword ptr kShuffleMaskARGBToRGB24
+
+ convertloop:
+ READYUV444
+ YUVTORGB(ebx)
+ STORERGB24
+
+ sub ecx, 8
+ jg convertloop
+
+ pop ebx
+ pop edi
+ pop esi
+ ret
+ }
+}
+
// 8 pixels
// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 RGB565 (16 bytes).
__declspec(naked) void I422ToRGB565Row_SSSE3(
@@ -3423,17 +3463,14 @@ __declspec(naked) void MergeUVRow_AVX2(const uint8_t* src_u,
sub edx, eax
convertloop:
- vmovdqu ymm0, [eax] // read 32 U's
- vmovdqu ymm1, [eax + edx] // and 32 V's
- lea eax, [eax + 32]
- vpunpcklbw ymm2, ymm0, ymm1 // low 16 UV pairs. mutated qqword 0,2
- vpunpckhbw ymm0, ymm0, ymm1 // high 16 UV pairs. mutated qqword 1,3
- vextractf128 [edi], ymm2, 0 // bytes 0..15
- vextractf128 [edi + 16], ymm0, 0 // bytes 16..31
- vextractf128 [edi + 32], ymm2, 1 // bytes 32..47
- vextractf128 [edi + 48], ymm0, 1 // bytes 47..63
- lea edi, [edi + 64]
- sub ecx, 32
+ vpmovzxbw ymm0, [eax]
+ vpmovzxbw ymm1, [eax + edx]
+ lea eax, [eax + 16]
+ vpsllw ymm1, ymm1, 8
+ vpor ymm2, ymm1, ymm0
+ vmovdqu [edi], ymm2
+ lea edi, [edi + 32]
+ sub ecx, 16
jg convertloop
pop edi
diff --git a/files/source/scale.cc b/files/source/scale.cc
index e1335f1e..80b030dc 100644
--- a/files/source/scale.cc
+++ b/files/source/scale.cc
@@ -198,6 +198,51 @@ static void ScalePlaneDown2_16(int src_width,
}
}
+void ScalePlaneDown2_16To8(int src_width,
+ int src_height,
+ int dst_width,
+ int dst_height,
+ int src_stride,
+ int dst_stride,
+ const uint16_t* src_ptr,
+ uint8_t* dst_ptr,
+ int scale,
+ enum FilterMode filtering) {
+ int y;
+ void (*ScaleRowDown2)(const uint16_t* src_ptr, ptrdiff_t src_stride,
+ uint8_t* dst_ptr, int dst_width, int scale) =
+ (src_width & 1)
+ ? (filtering == kFilterNone
+ ? ScaleRowDown2_16To8_Odd_C
+ : (filtering == kFilterLinear ? ScaleRowDown2Linear_16To8_Odd_C
+ : ScaleRowDown2Box_16To8_Odd_C))
+ : (filtering == kFilterNone
+ ? ScaleRowDown2_16To8_C
+ : (filtering == kFilterLinear ? ScaleRowDown2Linear_16To8_C
+ : ScaleRowDown2Box_16To8_C));
+ int row_stride = src_stride * 2;
+ (void)dst_height;
+ if (!filtering) {
+ src_ptr += src_stride; // Point to odd rows.
+ src_stride = 0;
+ }
+
+ if (filtering == kFilterLinear) {
+ src_stride = 0;
+ }
+ for (y = 0; y < src_height / 2; ++y) {
+ ScaleRowDown2(src_ptr, src_stride, dst_ptr, dst_width, scale);
+ src_ptr += row_stride;
+ dst_ptr += dst_stride;
+ }
+ if (src_height & 1) {
+ if (!filtering) {
+ src_ptr -= src_stride; // Point to last row.
+ }
+ ScaleRowDown2(src_ptr, 0, dst_ptr, dst_width, scale);
+ }
+}
+
// Scale plane, 1/4
// This is an optimized version for scaling down a plane to 1/4 of
// its original size.
@@ -775,9 +820,11 @@ static void ScaleAddCols2_C(int dst_width,
int ix = x >> 16;
x += dx;
boxwidth = MIN1((x >> 16) - ix);
- *dst_ptr++ =
- SumPixels(boxwidth, src_ptr + ix) * scaletbl[boxwidth - minboxwidth] >>
- 16;
+ int scaletbl_index = boxwidth - minboxwidth;
+ assert((scaletbl_index == 0) || (scaletbl_index == 1));
+ *dst_ptr++ = (uint8_t)(SumPixels(boxwidth, src_ptr + ix) *
+ scaletbl[scaletbl_index] >>
+ 16);
}
}
@@ -797,9 +844,10 @@ static void ScaleAddCols2_16_C(int dst_width,
int ix = x >> 16;
x += dx;
boxwidth = MIN1((x >> 16) - ix);
- *dst_ptr++ = SumPixels_16(boxwidth, src_ptr + ix) *
- scaletbl[boxwidth - minboxwidth] >>
- 16;
+ int scaletbl_index = boxwidth - minboxwidth;
+ assert((scaletbl_index == 0) || (scaletbl_index == 1));
+ *dst_ptr++ =
+ SumPixels_16(boxwidth, src_ptr + ix) * scaletbl[scaletbl_index] >> 16;
}
}
@@ -814,7 +862,7 @@ static void ScaleAddCols0_C(int dst_width,
(void)dx;
src_ptr += (x >> 16);
for (i = 0; i < dst_width; ++i) {
- *dst_ptr++ = src_ptr[i] * scaleval >> 16;
+ *dst_ptr++ = (uint8_t)(src_ptr[i] * scaleval >> 16);
}
}
@@ -829,7 +877,7 @@ static void ScaleAddCols1_C(int dst_width,
int i;
x >>= 16;
for (i = 0; i < dst_width; ++i) {
- *dst_ptr++ = SumPixels(boxwidth, src_ptr + x) * scaleval >> 16;
+ *dst_ptr++ = (uint8_t)(SumPixels(boxwidth, src_ptr + x) * scaleval >> 16);
x += boxwidth;
}
}
@@ -1020,10 +1068,10 @@ void ScalePlaneBilinearDown(int src_width,
const int max_y = (src_height - 1) << 16;
int j;
- void (*ScaleFilterCols)(uint8_t * dst_ptr, const uint8_t* src_ptr,
+ void (*ScaleFilterCols)(uint8_t* dst_ptr, const uint8_t* src_ptr,
int dst_width, int x, int dx) =
(src_width >= 32768) ? ScaleFilterCols64_C : ScaleFilterCols_C;
- void (*InterpolateRow)(uint8_t * dst_ptr, const uint8_t* src_ptr,
+ void (*InterpolateRow)(uint8_t* dst_ptr, const uint8_t* src_ptr,
ptrdiff_t src_stride, int dst_width,
int source_y_fraction) = InterpolateRow_C;
ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y,
@@ -1070,6 +1118,11 @@ void ScalePlaneBilinearDown(int src_width,
}
}
#endif
+#if defined(HAS_INTERPOLATEROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ InterpolateRow = InterpolateRow_RVV;
+ }
+#endif
#if defined(HAS_SCALEFILTERCOLS_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
@@ -1143,10 +1196,10 @@ void ScalePlaneBilinearDown_16(int src_width,
const int max_y = (src_height - 1) << 16;
int j;
- void (*ScaleFilterCols)(uint16_t * dst_ptr, const uint16_t* src_ptr,
+ void (*ScaleFilterCols)(uint16_t* dst_ptr, const uint16_t* src_ptr,
int dst_width, int x, int dx) =
(src_width >= 32768) ? ScaleFilterCols64_16_C : ScaleFilterCols_16_C;
- void (*InterpolateRow)(uint16_t * dst_ptr, const uint16_t* src_ptr,
+ void (*InterpolateRow)(uint16_t* dst_ptr, const uint16_t* src_ptr,
ptrdiff_t src_stride, int dst_width,
int source_y_fraction) = InterpolateRow_16_C;
ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y,
@@ -1231,10 +1284,10 @@ void ScalePlaneBilinearUp(int src_width,
int dx = 0;
int dy = 0;
const int max_y = (src_height - 1) << 16;
- void (*InterpolateRow)(uint8_t * dst_ptr, const uint8_t* src_ptr,
+ void (*InterpolateRow)(uint8_t* dst_ptr, const uint8_t* src_ptr,
ptrdiff_t src_stride, int dst_width,
int source_y_fraction) = InterpolateRow_C;
- void (*ScaleFilterCols)(uint8_t * dst_ptr, const uint8_t* src_ptr,
+ void (*ScaleFilterCols)(uint8_t* dst_ptr, const uint8_t* src_ptr,
int dst_width, int x, int dx) =
filtering ? ScaleFilterCols_C : ScaleCols_C;
ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y,
@@ -1265,6 +1318,11 @@ void ScalePlaneBilinearUp(int src_width,
}
}
#endif
+#if defined(HAS_INTERPOLATEROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ InterpolateRow = InterpolateRow_RVV;
+ }
+#endif
if (filtering && src_width >= 32768) {
ScaleFilterCols = ScaleFilterCols64_C;
@@ -1315,11 +1373,11 @@ void ScalePlaneBilinearUp(int src_width,
const uint8_t* src = src_ptr + yi * (int64_t)src_stride;
// Allocate 2 row buffers.
- const int kRowSize = (dst_width + 31) & ~31;
- align_buffer_64(row, kRowSize * 2);
+ const int row_size = (dst_width + 31) & ~31;
+ align_buffer_64(row, row_size * 2);
uint8_t* rowptr = row;
- int rowstride = kRowSize;
+ int rowstride = row_size;
int lasty = yi;
ScaleFilterCols(rowptr, src, dst_width, x, dx);
@@ -1699,10 +1757,10 @@ void ScalePlaneBilinearUp_16(int src_width,
int dx = 0;
int dy = 0;
const int max_y = (src_height - 1) << 16;
- void (*InterpolateRow)(uint16_t * dst_ptr, const uint16_t* src_ptr,
+ void (*InterpolateRow)(uint16_t* dst_ptr, const uint16_t* src_ptr,
ptrdiff_t src_stride, int dst_width,
int source_y_fraction) = InterpolateRow_16_C;
- void (*ScaleFilterCols)(uint16_t * dst_ptr, const uint16_t* src_ptr,
+ void (*ScaleFilterCols)(uint16_t* dst_ptr, const uint16_t* src_ptr,
int dst_width, int x, int dx) =
filtering ? ScaleFilterCols_16_C : ScaleCols_16_C;
ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y,
@@ -1766,11 +1824,11 @@ void ScalePlaneBilinearUp_16(int src_width,
const uint16_t* src = src_ptr + yi * (int64_t)src_stride;
// Allocate 2 row buffers.
- const int kRowSize = (dst_width + 31) & ~31;
- align_buffer_64(row, kRowSize * 4);
+ const int row_size = (dst_width + 31) & ~31;
+ align_buffer_64(row, row_size * 4);
uint16_t* rowptr = (uint16_t*)row;
- int rowstride = kRowSize;
+ int rowstride = row_size;
int lasty = yi;
ScaleFilterCols(rowptr, src, dst_width, x, dx);
@@ -1827,7 +1885,7 @@ static void ScalePlaneSimple(int src_width,
const uint8_t* src_ptr,
uint8_t* dst_ptr) {
int i;
- void (*ScaleCols)(uint8_t * dst_ptr, const uint8_t* src_ptr, int dst_width,
+ void (*ScaleCols)(uint8_t* dst_ptr, const uint8_t* src_ptr, int dst_width,
int x, int dx) = ScaleCols_C;
// Initial source x/y coordinate and step values as 16.16 fixed point.
int x = 0;
@@ -1864,7 +1922,7 @@ static void ScalePlaneSimple_16(int src_width,
const uint16_t* src_ptr,
uint16_t* dst_ptr) {
int i;
- void (*ScaleCols)(uint16_t * dst_ptr, const uint16_t* src_ptr, int dst_width,
+ void (*ScaleCols)(uint16_t* dst_ptr, const uint16_t* src_ptr, int dst_width,
int x, int dx) = ScaleCols_16_C;
// Initial source x/y coordinate and step values as 16.16 fixed point.
int x = 0;
diff --git a/files/source/scale_any.cc b/files/source/scale_any.cc
index 317041f8..f6576874 100644
--- a/files/source/scale_any.cc
+++ b/files/source/scale_any.cc
@@ -128,6 +128,22 @@ SDODD(ScaleRowDown2Box_Odd_NEON,
1,
15)
#endif
+#ifdef HAS_SCALEUVROWDOWN2_NEON
+SDANY(ScaleUVRowDown2_Any_NEON,
+ ScaleUVRowDown2_NEON,
+ ScaleUVRowDown2_C,
+ 2,
+ 2,
+ 7)
+#endif
+#ifdef HAS_SCALEUVROWDOWN2LINEAR_NEON
+SDANY(ScaleUVRowDown2Linear_Any_NEON,
+ ScaleUVRowDown2Linear_NEON,
+ ScaleUVRowDown2Linear_C,
+ 2,
+ 2,
+ 7)
+#endif
#ifdef HAS_SCALEUVROWDOWN2BOX_NEON
SDANY(ScaleUVRowDown2Box_Any_NEON,
ScaleUVRowDown2Box_NEON,
diff --git a/files/source/scale_argb.cc b/files/source/scale_argb.cc
index 9c3acf7f..ddd8d29e 100644
--- a/files/source/scale_argb.cc
+++ b/files/source/scale_argb.cc
@@ -58,9 +58,9 @@ static void ScaleARGBDown2(int src_width,
assert((dy & 0x1ffff) == 0); // Test vertical scale is multiple of 2.
// Advance to odd row, even column.
if (filtering == kFilterBilinear) {
- src_argb += (y >> 16) * (int64_t)src_stride + (x >> 16) * 4;
+ src_argb += (y >> 16) * (intptr_t)src_stride + (x >> 16) * 4;
} else {
- src_argb += (y >> 16) * (int64_t)src_stride + ((x >> 16) - 1) * 4;
+ src_argb += (y >> 16) * (intptr_t)src_stride + ((x >> 16) - 1) * 4;
}
#if defined(HAS_SCALEARGBROWDOWN2_SSE2)
@@ -155,14 +155,14 @@ static void ScaleARGBDown4Box(int src_width,
int dy) {
int j;
// Allocate 2 rows of ARGB.
- const int kRowSize = (dst_width * 2 * 4 + 31) & ~31;
- align_buffer_64(row, kRowSize * 2);
+ const int row_size = (dst_width * 2 * 4 + 31) & ~31;
+ align_buffer_64(row, row_size * 2);
int row_stride = src_stride * (dy >> 16);
void (*ScaleARGBRowDown2)(const uint8_t* src_argb, ptrdiff_t src_stride,
uint8_t* dst_argb, int dst_width) =
ScaleARGBRowDown2Box_C;
// Advance to odd row, even column.
- src_argb += (y >> 16) * (int64_t)src_stride + (x >> 16) * 4;
+ src_argb += (y >> 16) * (intptr_t)src_stride + (x >> 16) * 4;
(void)src_width;
(void)src_height;
(void)dx;
@@ -187,9 +187,9 @@ static void ScaleARGBDown4Box(int src_width,
for (j = 0; j < dst_height; ++j) {
ScaleARGBRowDown2(src_argb, src_stride, row, dst_width * 2);
- ScaleARGBRowDown2(src_argb + src_stride * 2, src_stride, row + kRowSize,
+ ScaleARGBRowDown2(src_argb + src_stride * 2, src_stride, row + row_size,
dst_width * 2);
- ScaleARGBRowDown2(row, kRowSize, dst_argb, dst_width);
+ ScaleARGBRowDown2(row, row_size, dst_argb, dst_width);
src_argb += row_stride;
dst_argb += dst_stride;
}
@@ -214,7 +214,7 @@ static void ScaleARGBDownEven(int src_width,
enum FilterMode filtering) {
int j;
int col_step = dx >> 16;
- int row_stride = (dy >> 16) * (int64_t)src_stride;
+ ptrdiff_t row_stride = (ptrdiff_t)((dy >> 16) * (intptr_t)src_stride);
void (*ScaleARGBRowDownEven)(const uint8_t* src_argb, ptrdiff_t src_stride,
int src_step, uint8_t* dst_argb, int dst_width) =
filtering ? ScaleARGBRowDownEvenBox_C : ScaleARGBRowDownEven_C;
@@ -222,7 +222,7 @@ static void ScaleARGBDownEven(int src_width,
(void)src_height;
assert(IS_ALIGNED(src_width, 2));
assert(IS_ALIGNED(src_height, 2));
- src_argb += (y >> 16) * (int64_t)src_stride + (x >> 16) * 4;
+ src_argb += (y >> 16) * (intptr_t)src_stride + (x >> 16) * 4;
#if defined(HAS_SCALEARGBROWDOWNEVEN_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_Any_SSE2
@@ -289,10 +289,10 @@ static void ScaleARGBBilinearDown(int src_width,
int dy,
enum FilterMode filtering) {
int j;
- void (*InterpolateRow)(uint8_t * dst_argb, const uint8_t* src_argb,
+ void (*InterpolateRow)(uint8_t* dst_argb, const uint8_t* src_argb,
ptrdiff_t src_stride, int dst_width,
int source_y_fraction) = InterpolateRow_C;
- void (*ScaleARGBFilterCols)(uint8_t * dst_argb, const uint8_t* src_argb,
+ void (*ScaleARGBFilterCols)(uint8_t* dst_argb, const uint8_t* src_argb,
int dst_width, int x, int dx) =
(src_width >= 32768) ? ScaleARGBFilterCols64_C : ScaleARGBFilterCols_C;
int64_t xlast = x + (int64_t)(dst_width - 1) * dx;
@@ -348,6 +348,11 @@ static void ScaleARGBBilinearDown(int src_width,
}
}
#endif
+#if defined(HAS_INTERPOLATEROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ InterpolateRow = InterpolateRow_RVV;
+ }
+#endif
#if defined(HAS_SCALEARGBFILTERCOLS_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3;
@@ -388,7 +393,7 @@ static void ScaleARGBBilinearDown(int src_width,
}
for (j = 0; j < dst_height; ++j) {
int yi = y >> 16;
- const uint8_t* src = src_argb + yi * (int64_t)src_stride;
+ const uint8_t* src = src_argb + yi * (intptr_t)src_stride;
if (filtering == kFilterLinear) {
ScaleARGBFilterCols(dst_argb, src, dst_width, x, dx);
} else {
@@ -421,10 +426,10 @@ static void ScaleARGBBilinearUp(int src_width,
int dy,
enum FilterMode filtering) {
int j;
- void (*InterpolateRow)(uint8_t * dst_argb, const uint8_t* src_argb,
+ void (*InterpolateRow)(uint8_t* dst_argb, const uint8_t* src_argb,
ptrdiff_t src_stride, int dst_width,
int source_y_fraction) = InterpolateRow_C;
- void (*ScaleARGBFilterCols)(uint8_t * dst_argb, const uint8_t* src_argb,
+ void (*ScaleARGBFilterCols)(uint8_t* dst_argb, const uint8_t* src_argb,
int dst_width, int x, int dx) =
filtering ? ScaleARGBFilterCols_C : ScaleARGBCols_C;
const int max_y = (src_height - 1) << 16;
@@ -468,6 +473,11 @@ static void ScaleARGBBilinearUp(int src_width,
}
}
#endif
+#if defined(HAS_INTERPOLATEROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ InterpolateRow = InterpolateRow_RVV;
+ }
+#endif
if (src_width >= 32768) {
ScaleARGBFilterCols =
filtering ? ScaleARGBFilterCols64_C : ScaleARGBCols64_C;
@@ -545,14 +555,14 @@ static void ScaleARGBBilinearUp(int src_width,
{
int yi = y >> 16;
- const uint8_t* src = src_argb + yi * (int64_t)src_stride;
+ const uint8_t* src = src_argb + yi * (intptr_t)src_stride;
// Allocate 2 rows of ARGB.
- const int kRowSize = (dst_width * 4 + 31) & ~31;
- align_buffer_64(row, kRowSize * 2);
+ const int row_size = (dst_width * 4 + 31) & ~31;
+ align_buffer_64(row, row_size * 2);
uint8_t* rowptr = row;
- int rowstride = kRowSize;
+ int rowstride = row_size;
int lasty = yi;
ScaleARGBFilterCols(rowptr, src, dst_width, x, dx);
@@ -570,7 +580,7 @@ static void ScaleARGBBilinearUp(int src_width,
if (y > max_y) {
y = max_y;
yi = y >> 16;
- src = src_argb + yi * (int64_t)src_stride;
+ src = src_argb + yi * (intptr_t)src_stride;
}
if (yi != lasty) {
ScaleARGBFilterCols(rowptr, src, dst_width, x, dx);
@@ -659,6 +669,14 @@ static void ScaleYUVToARGBBilinearUp(int src_width,
}
}
#endif
+#if defined(HAS_I422TOARGBROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ I422ToARGBRow = I422ToARGBRow_Any_LSX;
+ if (IS_ALIGNED(src_width, 16)) {
+ I422ToARGBRow = I422ToARGBRow_LSX;
+ }
+ }
+#endif
#if defined(HAS_I422TOARGBROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
I422ToARGBRow = I422ToARGBRow_Any_LASX;
@@ -667,8 +685,13 @@ static void ScaleYUVToARGBBilinearUp(int src_width,
}
}
#endif
+#if defined(HAS_I422TOARGBROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ I422ToARGBRow = I422ToARGBRow_RVV;
+ }
+#endif
- void (*InterpolateRow)(uint8_t * dst_argb, const uint8_t* src_argb,
+ void (*InterpolateRow)(uint8_t* dst_argb, const uint8_t* src_argb,
ptrdiff_t src_stride, int dst_width,
int source_y_fraction) = InterpolateRow_C;
#if defined(HAS_INTERPOLATEROW_SSSE3)
@@ -711,8 +734,13 @@ static void ScaleYUVToARGBBilinearUp(int src_width,
}
}
#endif
+#if defined(HAS_INTERPOLATEROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ InterpolateRow = InterpolateRow_RVV;
+ }
+#endif
- void (*ScaleARGBFilterCols)(uint8_t * dst_argb, const uint8_t* src_argb,
+ void (*ScaleARGBFilterCols)(uint8_t* dst_argb, const uint8_t* src_argb,
int dst_width, int x, int dx) =
filtering ? ScaleARGBFilterCols_C : ScaleARGBCols_C;
if (src_width >= 32768) {
@@ -793,19 +821,19 @@ static void ScaleYUVToARGBBilinearUp(int src_width,
const int kYShift = 1; // Shift Y by 1 to convert Y plane to UV coordinate.
int yi = y >> 16;
int uv_yi = yi >> kYShift;
- const uint8_t* src_row_y = src_y + yi * (int64_t)src_stride_y;
- const uint8_t* src_row_u = src_u + uv_yi * (int64_t)src_stride_u;
- const uint8_t* src_row_v = src_v + uv_yi * (int64_t)src_stride_v;
+ const uint8_t* src_row_y = src_y + yi * (intptr_t)src_stride_y;
+ const uint8_t* src_row_u = src_u + uv_yi * (intptr_t)src_stride_u;
+ const uint8_t* src_row_v = src_v + uv_yi * (intptr_t)src_stride_v;
// Allocate 2 rows of ARGB.
- const int kRowSize = (dst_width * 4 + 31) & ~31;
- align_buffer_64(row, kRowSize * 2);
+ const int row_size = (dst_width * 4 + 31) & ~31;
+ align_buffer_64(row, row_size * 2);
// Allocate 1 row of ARGB for source conversion.
align_buffer_64(argb_row, src_width * 4);
uint8_t* rowptr = row;
- int rowstride = kRowSize;
+ int rowstride = row_size;
int lasty = yi;
// TODO(fbarchard): Convert first 2 rows of YUV to ARGB.
@@ -833,9 +861,9 @@ static void ScaleYUVToARGBBilinearUp(int src_width,
y = max_y;
yi = y >> 16;
uv_yi = yi >> kYShift;
- src_row_y = src_y + yi * (int64_t)src_stride_y;
- src_row_u = src_u + uv_yi * (int64_t)src_stride_u;
- src_row_v = src_v + uv_yi * (int64_t)src_stride_v;
+ src_row_y = src_y + yi * (intptr_t)src_stride_y;
+ src_row_u = src_u + uv_yi * (intptr_t)src_stride_u;
+ src_row_v = src_v + uv_yi * (intptr_t)src_stride_v;
}
if (yi != lasty) {
// TODO(fbarchard): Convert the clipped region of row.
@@ -883,7 +911,7 @@ static void ScaleARGBSimple(int src_width,
int y,
int dy) {
int j;
- void (*ScaleARGBCols)(uint8_t * dst_argb, const uint8_t* src_argb,
+ void (*ScaleARGBCols)(uint8_t* dst_argb, const uint8_t* src_argb,
int dst_width, int x, int dx) =
(src_width >= 32768) ? ScaleARGBCols64_C : ScaleARGBCols_C;
(void)src_height;
@@ -926,7 +954,7 @@ static void ScaleARGBSimple(int src_width,
}
for (j = 0; j < dst_height; ++j) {
- ScaleARGBCols(dst_argb, src_argb + (y >> 16) * (int64_t)src_stride,
+ ScaleARGBCols(dst_argb, src_argb + (y >> 16) * (intptr_t)src_stride,
dst_width, x, dx);
dst_argb += dst_stride;
y += dy;
@@ -962,7 +990,7 @@ static void ScaleARGB(const uint8_t* src,
// Negative src_height means invert the image.
if (src_height < 0) {
src_height = -src_height;
- src = src + (src_height - 1) * (int64_t)src_stride;
+ src = src + (src_height - 1) * (intptr_t)src_stride;
src_stride = -src_stride;
}
ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y,
@@ -977,7 +1005,7 @@ static void ScaleARGB(const uint8_t* src,
if (clip_y) {
int64_t clipf = (int64_t)(clip_y)*dy;
y += (clipf & 0xffff);
- src += (clipf >> 16) * (int64_t)src_stride;
+ src += (clipf >> 16) * (intptr_t)src_stride;
dst += clip_y * dst_stride;
}
@@ -1011,7 +1039,7 @@ static void ScaleARGB(const uint8_t* src,
filtering = kFilterNone;
if (dx == 0x10000 && dy == 0x10000) {
// Straight copy.
- ARGBCopy(src + (y >> 16) * (int64_t)src_stride + (x >> 16) * 4,
+ ARGBCopy(src + (y >> 16) * (intptr_t)src_stride + (x >> 16) * 4,
src_stride, dst, dst_stride, clip_width, clip_height);
return;
}
diff --git a/files/source/scale_common.cc b/files/source/scale_common.cc
index b02bdafd..77455903 100644
--- a/files/source/scale_common.cc
+++ b/files/source/scale_common.cc
@@ -23,6 +23,25 @@ namespace libyuv {
extern "C" {
#endif
+#ifdef __cplusplus
+#define STATIC_CAST(type, expr) static_cast<type>(expr)
+#else
+#define STATIC_CAST(type, expr) (type)(expr)
+#endif
+
+// TODO(fbarchard): make clamp255 preserve negative values.
+static __inline int32_t clamp255(int32_t v) {
+ return (-(v >= 255) | v) & 255;
+}
+
+// Use scale to convert lsb formats to msb, depending how many bits there are:
+// 32768 = 9 bits
+// 16384 = 10 bits
+// 4096 = 12 bits
+// 256 = 16 bits
+// TODO(fbarchard): change scale to bits
+#define C16TO8(v, scale) clamp255(((v) * (scale)) >> 16)
+
static __inline int Abs(int v) {
return v >= 0 ? v : -v;
}
@@ -62,6 +81,50 @@ void ScaleRowDown2_16_C(const uint16_t* src_ptr,
}
}
+void ScaleRowDown2_16To8_C(const uint16_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst,
+ int dst_width,
+ int scale) {
+ int x;
+ (void)src_stride;
+ assert(scale >= 256);
+ assert(scale <= 32768);
+ for (x = 0; x < dst_width - 1; x += 2) {
+ dst[0] = STATIC_CAST(uint8_t, C16TO8(src_ptr[1], scale));
+ dst[1] = STATIC_CAST(uint8_t, C16TO8(src_ptr[3], scale));
+ dst += 2;
+ src_ptr += 4;
+ }
+ if (dst_width & 1) {
+ dst[0] = STATIC_CAST(uint8_t, C16TO8(src_ptr[1], scale));
+ }
+}
+
+void ScaleRowDown2_16To8_Odd_C(const uint16_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst,
+ int dst_width,
+ int scale) {
+ int x;
+ (void)src_stride;
+ assert(scale >= 256);
+ assert(scale <= 32768);
+ dst_width -= 1;
+ for (x = 0; x < dst_width - 1; x += 2) {
+ dst[0] = STATIC_CAST(uint8_t, C16TO8(src_ptr[1], scale));
+ dst[1] = STATIC_CAST(uint8_t, C16TO8(src_ptr[3], scale));
+ dst += 2;
+ src_ptr += 4;
+ }
+ if (dst_width & 1) {
+ dst[0] = STATIC_CAST(uint8_t, C16TO8(src_ptr[1], scale));
+ dst += 1;
+ src_ptr += 2;
+ }
+ dst[0] = STATIC_CAST(uint8_t, C16TO8(src_ptr[0], scale));
+}
+
void ScaleRowDown2Linear_C(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst,
@@ -98,6 +161,52 @@ void ScaleRowDown2Linear_16_C(const uint16_t* src_ptr,
}
}
+void ScaleRowDown2Linear_16To8_C(const uint16_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst,
+ int dst_width,
+ int scale) {
+ const uint16_t* s = src_ptr;
+ int x;
+ (void)src_stride;
+ assert(scale >= 256);
+ assert(scale <= 32768);
+ for (x = 0; x < dst_width - 1; x += 2) {
+ dst[0] = STATIC_CAST(uint8_t, C16TO8((s[0] + s[1] + 1) >> 1, scale));
+ dst[1] = STATIC_CAST(uint8_t, C16TO8((s[2] + s[3] + 1) >> 1, scale));
+ dst += 2;
+ s += 4;
+ }
+ if (dst_width & 1) {
+ dst[0] = STATIC_CAST(uint8_t, C16TO8((s[0] + s[1] + 1) >> 1, scale));
+ }
+}
+
+void ScaleRowDown2Linear_16To8_Odd_C(const uint16_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst,
+ int dst_width,
+ int scale) {
+ const uint16_t* s = src_ptr;
+ int x;
+ (void)src_stride;
+ assert(scale >= 256);
+ assert(scale <= 32768);
+ dst_width -= 1;
+ for (x = 0; x < dst_width - 1; x += 2) {
+ dst[0] = STATIC_CAST(uint8_t, C16TO8((s[0] + s[1] + 1) >> 1, scale));
+ dst[1] = STATIC_CAST(uint8_t, C16TO8((s[2] + s[3] + 1) >> 1, scale));
+ dst += 2;
+ s += 4;
+ }
+ if (dst_width & 1) {
+ dst[0] = STATIC_CAST(uint8_t, C16TO8((s[0] + s[1] + 1) >> 1, scale));
+ dst += 1;
+ s += 2;
+ }
+ dst[0] = STATIC_CAST(uint8_t, C16TO8(s[0], scale));
+}
+
void ScaleRowDown2Box_C(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst,
@@ -160,6 +269,61 @@ void ScaleRowDown2Box_16_C(const uint16_t* src_ptr,
}
}
+void ScaleRowDown2Box_16To8_C(const uint16_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst,
+ int dst_width,
+ int scale) {
+ const uint16_t* s = src_ptr;
+ const uint16_t* t = src_ptr + src_stride;
+ int x;
+ assert(scale >= 256);
+ assert(scale <= 32768);
+ for (x = 0; x < dst_width - 1; x += 2) {
+ dst[0] = STATIC_CAST(uint8_t,
+ C16TO8((s[0] + s[1] + t[0] + t[1] + 2) >> 2, scale));
+ dst[1] = STATIC_CAST(uint8_t,
+ C16TO8((s[2] + s[3] + t[2] + t[3] + 2) >> 2, scale));
+ dst += 2;
+ s += 4;
+ t += 4;
+ }
+ if (dst_width & 1) {
+ dst[0] = STATIC_CAST(uint8_t,
+ C16TO8((s[0] + s[1] + t[0] + t[1] + 2) >> 2, scale));
+ }
+}
+
+void ScaleRowDown2Box_16To8_Odd_C(const uint16_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst,
+ int dst_width,
+ int scale) {
+ const uint16_t* s = src_ptr;
+ const uint16_t* t = src_ptr + src_stride;
+ int x;
+ assert(scale >= 256);
+ assert(scale <= 32768);
+ dst_width -= 1;
+ for (x = 0; x < dst_width - 1; x += 2) {
+ dst[0] = STATIC_CAST(uint8_t,
+ C16TO8((s[0] + s[1] + t[0] + t[1] + 2) >> 2, scale));
+ dst[1] = STATIC_CAST(uint8_t,
+ C16TO8((s[2] + s[3] + t[2] + t[3] + 2) >> 2, scale));
+ dst += 2;
+ s += 4;
+ t += 4;
+ }
+ if (dst_width & 1) {
+ dst[0] = STATIC_CAST(uint8_t,
+ C16TO8((s[0] + s[1] + t[0] + t[1] + 2) >> 2, scale));
+ dst += 1;
+ s += 2;
+ t += 2;
+ }
+ dst[0] = STATIC_CAST(uint8_t, C16TO8((s[0] + t[0] + 1) >> 1, scale));
+}
+
void ScaleRowDown4_C(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst,
@@ -1116,18 +1280,13 @@ void ScaleUVRowDown2_C(const uint8_t* src_uv,
ptrdiff_t src_stride,
uint8_t* dst_uv,
int dst_width) {
- const uint16_t* src = (const uint16_t*)(src_uv);
- uint16_t* dst = (uint16_t*)(dst_uv);
int x;
(void)src_stride;
- for (x = 0; x < dst_width - 1; x += 2) {
- dst[0] = src[1];
- dst[1] = src[3];
- src += 2;
- dst += 2;
- }
- if (dst_width & 1) {
- dst[0] = src[1];
+ for (x = 0; x < dst_width; ++x) {
+ dst_uv[0] = src_uv[2]; // Store the 2nd UV
+ dst_uv[1] = src_uv[3];
+ src_uv += 4;
+ dst_uv += 2;
}
}
@@ -1469,7 +1628,7 @@ void ScalePlaneVertical(int src_height,
enum FilterMode filtering) {
// TODO(fbarchard): Allow higher bpp.
int dst_width_bytes = dst_width * bpp;
- void (*InterpolateRow)(uint8_t * dst_argb, const uint8_t* src_argb,
+ void (*InterpolateRow)(uint8_t* dst_argb, const uint8_t* src_argb,
ptrdiff_t src_stride, int dst_width,
int source_y_fraction) = InterpolateRow_C;
const int max_y = (src_height > 1) ? ((src_height - 1) << 16) - 1 : 0;
@@ -1519,6 +1678,12 @@ void ScalePlaneVertical(int src_height,
}
}
#endif
+#if defined(HAS_INTERPOLATEROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ InterpolateRow = InterpolateRow_RVV;
+ }
+#endif
+
for (j = 0; j < dst_height; ++j) {
int yi;
int yf;
@@ -1548,7 +1713,7 @@ void ScalePlaneVertical_16(int src_height,
enum FilterMode filtering) {
// TODO(fbarchard): Allow higher wpp.
int dst_width_words = dst_width * wpp;
- void (*InterpolateRow)(uint16_t * dst_argb, const uint16_t* src_argb,
+ void (*InterpolateRow)(uint16_t* dst_argb, const uint16_t* src_argb,
ptrdiff_t src_stride, int dst_width,
int source_y_fraction) = InterpolateRow_16_C;
const int max_y = (src_height > 1) ? ((src_height - 1) << 16) - 1 : 0;
@@ -1627,7 +1792,7 @@ void ScalePlaneVertical_16To8(int src_height,
// TODO(fbarchard): Allow higher wpp.
int dst_width_words = dst_width * wpp;
// TODO(https://crbug.com/libyuv/931): Add NEON 32 bit and AVX2 versions.
- void (*InterpolateRow_16To8)(uint8_t * dst_argb, const uint16_t* src_argb,
+ void (*InterpolateRow_16To8)(uint8_t* dst_argb, const uint16_t* src_argb,
ptrdiff_t src_stride, int scale, int dst_width,
int source_y_fraction) = InterpolateRow_16To8_C;
const int max_y = (src_height > 1) ? ((src_height - 1) << 16) - 1 : 0;
diff --git a/files/source/scale_gcc.cc b/files/source/scale_gcc.cc
index edaf2e29..17eeffad 100644
--- a/files/source/scale_gcc.cc
+++ b/files/source/scale_gcc.cc
@@ -1094,7 +1094,8 @@ void ScaleRowUp2_Bilinear_12_SSSE3(const uint16_t* src_ptr,
: "r"((intptr_t)(src_stride)), // %3
"r"((intptr_t)(dst_stride)), // %4
"m"(kLinearShuffleFar) // %5
- : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6");
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6",
+ "xmm7");
}
#endif
@@ -1294,7 +1295,7 @@ void ScaleRowUp2_Linear_SSSE3(const uint8_t* src_ptr,
"+r"(dst_ptr), // %1
"+r"(dst_width) // %2
: "m"(kLinearMadd31) // %3
- : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6");
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4");
}
#endif
diff --git a/files/source/scale_mmi.cc b/files/source/scale_mmi.cc
deleted file mode 100644
index 1226ef3e..00000000
--- a/files/source/scale_mmi.cc
+++ /dev/null
@@ -1,1168 +0,0 @@
-/*
- * Copyright 2013 The LibYuv Project Authors. All rights reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "libyuv/scale.h"
-
-#include <assert.h>
-#include <string.h>
-
-#include "libyuv/cpu_id.h"
-#include "libyuv/planar_functions.h" // For CopyARGB
-#include "libyuv/row.h"
-#include "libyuv/scale_row.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-// This module is for Mips MMI.
-#if !defined(LIBYUV_DISABLE_MMI) && defined(_MIPS_ARCH_LOONGSON3A)
-
-// clang-format off
-
-// CPU agnostic row functions
-void ScaleRowDown2_MMI(const uint8_t* src_ptr,
- ptrdiff_t src_stride,
- uint8_t* dst,
- int dst_width) {
- (void)src_stride;
-
- uint64_t src0, src1, dest;
- const uint64_t shift = 0x8ULL;
-
- __asm__ volatile(
- "1: \n\t"
- "gsldrc1 %[src0], 0x00(%[src_ptr]) \n\t"
- "gsldlc1 %[src0], 0x07(%[src_ptr]) \n\t"
- "psrlh %[src0], %[src0], %[shift] \n\t"
-
- "gsldrc1 %[src1], 0x08(%[src_ptr]) \n\t"
- "gsldlc1 %[src1], 0x0f(%[src_ptr]) \n\t"
- "psrlh %[src1], %[src1], %[shift] \n\t"
-
- "packushb %[dest], %[src0], %[src1] \n\t"
- "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
- "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
-
- "daddiu %[src_ptr], %[src_ptr], 0x10 \n\t"
- "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t"
- "daddi %[width], %[width], -0x08 \n\t"
- "bnez %[width], 1b \n\t"
- : [src0] "=&f"(src0), [src1] "=&f"(src1), [dest] "=&f"(dest)
- : [src_ptr] "r"(src_ptr), [dst_ptr] "r"(dst), [width] "r"(dst_width),
- [shift] "f"(shift)
- : "memory");
-}
-
-void ScaleRowDown2Linear_MMI(const uint8_t* src_ptr,
- ptrdiff_t src_stride,
- uint8_t* dst,
- int dst_width) {
- (void)src_stride;
-
- uint64_t src0, src1;
- uint64_t dest, dest0, dest1;
-
- const uint64_t mask = 0x00ff00ff00ff00ffULL;
- const uint64_t shift = 0x8ULL;
-
- __asm__ volatile(
- "1: \n\t"
- "gsldrc1 %[src0], 0x00(%[src_ptr]) \n\t"
- "gsldlc1 %[src0], 0x07(%[src_ptr]) \n\t"
- "and %[dest0], %[src0], %[mask] \n\t"
- "gsldrc1 %[src1], 0x08(%[src_ptr]) \n\t"
- "gsldlc1 %[src1], 0x0f(%[src_ptr]) \n\t"
- "and %[dest1], %[src1], %[mask] \n\t"
- "packushb %[dest0], %[dest0], %[dest1] \n\t"
-
- "psrlh %[src0], %[src0], %[shift] \n\t"
- "psrlh %[src1], %[src1], %[shift] \n\t"
- "packushb %[dest1], %[src0], %[src1] \n\t"
-
- "pavgb %[dest], %[dest0], %[dest1] \n\t"
- "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
- "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
-
- "daddiu %[src_ptr], %[src_ptr], 0x10 \n\t"
- "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t"
- "daddi %[width], %[width], -0x08 \n\t"
- "bnez %[width], 1b \n\t"
- : [src0] "=&f"(src0), [src1] "=&f"(src1), [dest0] "=&f"(dest0),
- [dest1] "=&f"(dest1), [dest] "=&f"(dest)
- : [src_ptr] "r"(src_ptr), [dst_ptr] "r"(dst), [mask] "f"(mask),
- [shift] "f"(shift), [width] "r"(dst_width)
- : "memory");
-}
-
-void ScaleRowDown2Box_MMI(const uint8_t* src_ptr,
- ptrdiff_t src_stride,
- uint8_t* dst,
- int dst_width) {
- const uint8_t* s = src_ptr;
- const uint8_t* t = src_ptr + src_stride;
-
- uint64_t s0, s1, t0, t1;
- uint64_t dest, dest0, dest1;
-
- const uint64_t ph = 0x0002000200020002ULL;
- const uint64_t mask = 0x00ff00ff00ff00ffULL;
- const uint64_t shift0 = 0x2ULL;
- const uint64_t shift1 = 0x8ULL;
-
- __asm__ volatile(
- "1: \n\t"
- "gsldrc1 %[s0], 0x00(%[s]) \n\t"
- "gsldlc1 %[s0], 0x07(%[s]) \n\t"
- "psrlh %[s1], %[s0], %[shift1] \n\t"
- "and %[s0], %[s0], %[mask] \n\t"
-
- "gsldrc1 %[t0], 0x00(%[t]) \n\t"
- "gsldlc1 %[t0], 0x07(%[t]) \n\t"
- "psrlh %[t1], %[t0], %[shift1] \n\t"
- "and %[t0], %[t0], %[mask] \n\t"
-
- "paddh %[dest0], %[s0], %[s1] \n\t"
- "paddh %[dest0], %[dest0], %[t0] \n\t"
- "paddh %[dest0], %[dest0], %[t1] \n\t"
- "paddh %[dest0], %[dest0], %[ph] \n\t"
- "psrlh %[dest0], %[dest0], %[shift0] \n\t"
-
- "gsldrc1 %[s0], 0x08(%[s]) \n\t"
- "gsldlc1 %[s0], 0x0f(%[s]) \n\t"
- "psrlh %[s1], %[s0], %[shift1] \n\t"
- "and %[s0], %[s0], %[mask] \n\t"
-
- "gsldrc1 %[t0], 0x08(%[t]) \n\t"
- "gsldlc1 %[t0], 0x0f(%[t]) \n\t"
- "psrlh %[t1], %[t0], %[shift1] \n\t"
- "and %[t0], %[t0], %[mask] \n\t"
-
- "paddh %[dest1], %[s0], %[s1] \n\t"
- "paddh %[dest1], %[dest1], %[t0] \n\t"
- "paddh %[dest1], %[dest1], %[t1] \n\t"
- "paddh %[dest1], %[dest1], %[ph] \n\t"
- "psrlh %[dest1], %[dest1], %[shift0] \n\t"
-
- "packushb %[dest], %[dest0], %[dest1] \n\t"
- "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
- "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
-
- "daddiu %[s], %[s], 0x10 \n\t"
- "daddiu %[t], %[t], 0x10 \n\t"
- "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t"
- "daddi %[width], %[width], -0x08 \n\t"
- "bnez %[width], 1b \n\t"
- : [s0] "=&f"(s0), [s1] "=&f"(s1), [t0] "=&f"(t0), [t1] "=&f"(t1),
- [dest0] "=&f"(dest0), [dest1] "=&f"(dest1), [dest] "=&f"(dest)
- : [s] "r"(s), [t] "r"(t), [dst_ptr] "r"(dst), [width] "r"(dst_width),
- [shift0] "f"(shift0), [shift1] "f"(shift1), [ph] "f"(ph),
- [mask] "f"(mask)
- : "memory");
-}
-
-void ScaleARGBRowDown2_MMI(const uint8_t* src_argb,
- ptrdiff_t src_stride,
- uint8_t* dst_argb,
- int dst_width) {
- (void)src_stride;
-
- const uint32_t* src = (const uint32_t*)(src_argb);
- uint32_t* dst = (uint32_t*)(dst_argb);
-
- uint64_t src0, src1, dest;
-
- __asm__ volatile(
- "1: \n\t"
- "gsldrc1 %[src0], 0x00(%[src_ptr]) \n\t"
- "gsldlc1 %[src0], 0x07(%[src_ptr]) \n\t"
- "gsldrc1 %[src1], 0x08(%[src_ptr]) \n\t"
- "gsldlc1 %[src1], 0x0f(%[src_ptr]) \n\t"
- "punpckhwd %[dest], %[src0], %[src1] \n\t"
-
- "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
- "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
-
- "daddiu %[src_ptr], %[src_ptr], 0x10 \n\t"
- "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t"
- "daddi %[width], %[width], -0x02 \n\t"
- "bnez %[width], 1b \n\t"
- : [src0] "=&f"(src0), [src1] "=&f"(src1), [dest] "=&f"(dest)
- : [src_ptr] "r"(src), [dst_ptr] "r"(dst), [width] "r"(dst_width)
- : "memory");
-}
-
-void ScaleARGBRowDown2Linear_MMI(const uint8_t* src_argb,
- ptrdiff_t src_stride,
- uint8_t* dst_argb,
- int dst_width) {
- (void)src_stride;
-
- uint64_t src0, src1;
- uint64_t dest, dest_hi, dest_lo;
-
- __asm__ volatile(
- "1: \n\t"
- "lwc1 %[src0], 0x00(%[src_ptr]) \n\t"
- "lwc1 %[src1], 0x08(%[src_ptr]) \n\t"
- "punpcklwd %[dest_lo], %[src0], %[src1] \n\t"
- "lwc1 %[src0], 0x04(%[src_ptr]) \n\t"
- "lwc1 %[src1], 0x0c(%[src_ptr]) \n\t"
- "punpcklwd %[dest_hi], %[src0], %[src1] \n\t"
-
- "pavgb %[dest], %[dest_lo], %[dest_hi] \n\t"
- "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
- "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
-
- "daddiu %[src_ptr], %[src_ptr], 0x10 \n\t"
- "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t"
- "daddi %[width], %[width], -0x02 \n\t"
- "bnez %[width], 1b \n\t"
- : [src0] "=&f"(src0), [src1] "=&f"(src1), [dest_hi] "=&f"(dest_hi),
- [dest_lo] "=&f"(dest_lo), [dest] "=&f"(dest)
- : [src_ptr] "r"(src_argb), [dst_ptr] "r"(dst_argb), [width] "r"(dst_width)
- : "memory");
-}
-
-void ScaleARGBRowDown2Box_MMI(const uint8_t* src_argb,
- ptrdiff_t src_stride,
- uint8_t* dst_argb,
- int dst_width) {
- const uint8_t* s = src_argb;
- const uint8_t* t = src_argb + src_stride;
-
- uint64_t s0, s_hi, s_lo;
- uint64_t t0, t_hi, t_lo;
- uint64_t dest, dest_hi, dest_lo;
-
- const uint64_t mask = 0x0ULL;
- const uint64_t ph = 0x0002000200020002ULL;
- const uint64_t shfit = 0x2ULL;
-
- __asm__ volatile(
- "1: \n\t"
- "gsldrc1 %[s0], 0x00(%[s]) \n\t"
- "gsldlc1 %[s0], 0x07(%[s]) \n\t"
- "punpcklbh %[s_lo], %[s0], %[mask] \n\t"
- "punpckhbh %[s_hi], %[s0], %[mask] \n\t"
- "paddh %[dest_lo], %[s_lo], %[s_hi] \n\t"
-
- "gsldrc1 %[t0], 0x00(%[t]) \n\t"
- "gsldlc1 %[t0], 0x07(%[t]) \n\t"
- "punpcklbh %[t_lo], %[t0], %[mask] \n\t"
- "punpckhbh %[t_hi], %[t0], %[mask] \n\t"
- "paddh %[dest_lo], %[dest_lo], %[t_lo] \n\t"
- "paddh %[dest_lo], %[dest_lo], %[t_hi] \n\t"
-
- "paddh %[dest_lo], %[dest_lo], %[ph] \n\t"
- "psrlh %[dest_lo], %[dest_lo], %[shfit] \n\t"
-
- "gsldrc1 %[s0], 0x08(%[s]) \n\t"
- "gsldlc1 %[s0], 0x0f(%[s]) \n\t"
- "punpcklbh %[s_lo], %[s0], %[mask] \n\t"
- "punpckhbh %[s_hi], %[s0], %[mask] \n\t"
- "paddh %[dest_hi], %[s_lo], %[s_hi] \n\t"
-
- "gsldrc1 %[t0], 0x08(%[t]) \n\t"
- "gsldlc1 %[t0], 0x0f(%[t]) \n\t"
- "punpcklbh %[t_lo], %[t0], %[mask] \n\t"
- "punpckhbh %[t_hi], %[t0], %[mask] \n\t"
- "paddh %[dest_hi], %[dest_hi], %[t_lo] \n\t"
- "paddh %[dest_hi], %[dest_hi], %[t_hi] \n\t"
-
- "paddh %[dest_hi], %[dest_hi], %[ph] \n\t"
- "psrlh %[dest_hi], %[dest_hi], %[shfit] \n\t"
-
- "packushb %[dest], %[dest_lo], %[dest_hi] \n\t"
- "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
- "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
-
- "daddiu %[s], %[s], 0x10 \n\t"
- "daddiu %[t], %[t], 0x10 \n\t"
- "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t"
- "daddi %[width], %[width], -0x02 \n\t"
- "bnez %[width], 1b \n\t"
- : [s0] "=&f"(s0), [t0] "=&f"(t0), [dest_hi] "=&f"(dest_hi),
- [dest_lo] "=&f"(dest_lo), [s_hi] "=&f"(s_hi), [s_lo] "=&f"(s_lo),
- [t_hi] "=&f"(t_hi), [t_lo] "=&f"(t_lo), [dest] "=&f"(dest)
- : [s] "r"(s), [t] "r"(t), [dst_ptr] "r"(dst_argb), [width] "r"(dst_width),
- [mask] "f"(mask), [ph] "f"(ph), [shfit] "f"(shfit)
- : "memory");
-}
-
-void ScaleRowDown2_16_MMI(const uint16_t* src_ptr,
- ptrdiff_t src_stride,
- uint16_t* dst,
- int dst_width) {
- (void)src_stride;
-
- uint64_t src0, src1, dest;
- const uint64_t shift = 0x10ULL;
-
- __asm__ volatile(
- "1: \n\t"
- "gsldrc1 %[src0], 0x00(%[src_ptr]) \n\t"
- "gsldlc1 %[src0], 0x07(%[src_ptr]) \n\t"
- "psrlw %[src0], %[src0], %[shift] \n\t"
-
- "gsldrc1 %[src1], 0x08(%[src_ptr]) \n\t"
- "gsldlc1 %[src1], 0x0f(%[src_ptr]) \n\t"
- "psrlw %[src1], %[src1], %[shift] \n\t"
-
- "packsswh %[dest], %[src0], %[src1] \n\t"
- "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
- "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
-
- "daddiu %[src_ptr], %[src_ptr], 0x10 \n\t"
- "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t"
- "daddi %[width], %[width], -0x04 \n\t"
- "bnez %[width], 1b \n\t"
- : [src0] "=&f"(src0), [src1] "=&f"(src1), [dest] "=&f"(dest)
- : [src_ptr] "r"(src_ptr), [dst_ptr] "r"(dst), [width] "r"(dst_width),
- [shift] "f"(shift)
- : "memory");
-}
-
-void ScaleRowDown2Linear_16_MMI(const uint16_t* src_ptr,
- ptrdiff_t src_stride,
- uint16_t* dst,
- int dst_width) {
- (void)src_stride;
-
- uint64_t src0, src1;
- uint64_t dest, dest_hi, dest_lo;
-
- __asm__ volatile(
- "1: \n\t"
- "gsldrc1 %[src0], 0x00(%[src_ptr]) \n\t"
- "gsldlc1 %[src0], 0x07(%[src_ptr]) \n\t"
- "gsldrc1 %[src1], 0x08(%[src_ptr]) \n\t"
- "gsldlc1 %[src1], 0x0f(%[src_ptr]) \n\t"
- "punpcklhw %[dest_lo], %[src0], %[src1] \n\t"
- "punpckhhw %[dest_hi], %[src0], %[src1] \n\t"
-
- "punpcklhw %[src0], %[dest_lo], %[dest_hi] \n\t"
- "punpckhhw %[src1], %[dest_lo], %[dest_hi] \n\t"
-
- "pavgh %[dest], %[src0], %[src1] \n\t"
- "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
- "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
-
- "daddiu %[src_ptr], %[src_ptr], 0x10 \n\t"
- "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t"
- "daddi %[width], %[width], -0x04 \n\t"
- "bnez %[width], 1b \n\t"
- : [src0] "=&f"(src0), [src1] "=&f"(src1), [dest_hi] "=&f"(dest_hi),
- [dest_lo] "=&f"(dest_lo), [dest] "=&f"(dest)
- : [src_ptr] "r"(src_ptr), [dst_ptr] "r"(dst), [width] "r"(dst_width)
- : "memory");
-}
-
-void ScaleRowDown2Box_16_MMI(const uint16_t* src_ptr,
- ptrdiff_t src_stride,
- uint16_t* dst,
- int dst_width) {
- const uint16_t* s = src_ptr;
- const uint16_t* t = src_ptr + src_stride;
-
- uint64_t s0, s1, s_hi, s_lo;
- uint64_t t0, t1, t_hi, t_lo;
- uint64_t dest, dest0, dest1;
-
- const uint64_t ph = 0x0000000200000002ULL;
- const uint64_t mask = 0x0000ffff0000ffffULL;
- const uint64_t shift0 = 0x10ULL;
- const uint64_t shift1 = 0x2ULL;
-
- __asm__ volatile(
- "1: \n\t"
- "gsldrc1 %[s0], 0x00(%[s]) \n\t"
- "gsldlc1 %[s0], 0x07(%[s]) \n\t"
- "psrlw %[s1], %[s0], %[shift0] \n\t"
- "and %[s0], %[s0], %[mask] \n\t"
-
- "gsldrc1 %[t0], 0x00(%[t]) \n\t"
- "gsldlc1 %[t0], 0x07(%[t]) \n\t"
- "psrlw %[t1], %[t0], %[shift0] \n\t"
- "and %[t0], %[t0], %[mask] \n\t"
-
- "paddw %[dest0], %[s0], %[s1] \n\t"
- "paddw %[dest0], %[dest0], %[t0] \n\t"
- "paddw %[dest0], %[dest0], %[t1] \n\t"
- "paddw %[dest0], %[dest0], %[ph] \n\t"
- "psrlw %[dest0], %[dest0], %[shift1] \n\t"
-
- "gsldrc1 %[s0], 0x08(%[s]) \n\t"
- "gsldlc1 %[s0], 0x0f(%[s]) \n\t"
- "psrlw %[s1], %[s0], %[shift0] \n\t"
- "and %[s0], %[s0], %[mask] \n\t"
-
- "gsldrc1 %[t0], 0x08(%[t]) \n\t"
- "gsldlc1 %[t0], 0x0f(%[t]) \n\t"
- "psrlw %[t1], %[t0], %[shift0] \n\t"
- "and %[t0], %[t0], %[mask] \n\t"
-
- "paddw %[dest1], %[s0], %[s1] \n\t"
- "paddw %[dest1], %[dest1], %[t0] \n\t"
- "paddw %[dest1], %[dest1], %[t1] \n\t"
- "paddw %[dest1], %[dest1], %[ph] \n\t"
- "psrlw %[dest1], %[dest1], %[shift1] \n\t"
-
- "packsswh %[dest], %[dest0], %[dest1] \n\t"
- "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
- "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
-
- "daddiu %[s], %[s], 0x10 \n\t"
- "daddiu %[t], %[t], 0x10 \n\t"
- "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t"
- "daddi %[width], %[width], -0x04 \n\t"
- "bnez %[width], 1b \n\t"
- : [s0] "=&f"(s0), [s1] "=&f"(s1), [t0] "=&f"(t0), [t1] "=&f"(t1),
- [s_hi] "=&f"(s_hi), [s_lo] "=&f"(s_lo), [t_hi] "=&f"(t_hi),
- [t_lo] "=&f"(t_lo), [dest0] "=&f"(dest0), [dest1] "=&f"(dest1),
- [dest] "=&f"(dest)
- : [s] "r"(s), [t] "r"(t), [dst_ptr] "r"(dst), [width] "r"(dst_width),
- [shift0] "f"(shift0), [shift1] "f"(shift1), [ph] "f"(ph),
- [mask] "f"(mask)
- : "memory");
-}
-
-void ScaleRowDown4_MMI(const uint8_t* src_ptr,
- ptrdiff_t src_stride,
- uint8_t* dst,
- int dst_width) {
- (void)src_stride;
-
- uint64_t src0, src1;
- uint64_t dest, dest_hi, dest_lo;
-
- const uint64_t shift = 0x10ULL;
- const uint64_t mask = 0x000000ff000000ffULL;
-
- __asm__ volatile(
- "1: \n\t"
- "gsldrc1 %[src0], 0x00(%[src_ptr]) \n\t"
- "gsldlc1 %[src0], 0x07(%[src_ptr]) \n\t"
- "psrlw %[src0], %[src0], %[shift] \n\t"
- "and %[src0], %[src0], %[mask] \n\t"
- "gsldrc1 %[src1], 0x08(%[src_ptr]) \n\t"
- "gsldlc1 %[src1], 0x0f(%[src_ptr]) \n\t"
- "psrlw %[src1], %[src1], %[shift] \n\t"
- "and %[src1], %[src1], %[mask] \n\t"
- "packsswh %[dest_lo], %[src0], %[src1] \n\t"
-
- "gsldrc1 %[src0], 0x10(%[src_ptr]) \n\t"
- "gsldlc1 %[src0], 0x17(%[src_ptr]) \n\t"
- "psrlw %[src0], %[src0], %[shift] \n\t"
- "and %[src0], %[src0], %[mask] \n\t"
- "gsldrc1 %[src1], 0x18(%[src_ptr]) \n\t"
- "gsldlc1 %[src1], 0x1f(%[src_ptr]) \n\t"
- "psrlw %[src1], %[src1], %[shift] \n\t"
- "and %[src1], %[src1], %[mask] \n\t"
- "packsswh %[dest_hi], %[src0], %[src1] \n\t"
-
- "packushb %[dest], %[dest_lo], %[dest_hi] \n\t"
- "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
- "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
-
- "daddiu %[src_ptr], %[src_ptr], 0x20 \n\t"
- "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t"
- "daddi %[width], %[width], -0x08 \n\t"
- "bnez %[width], 1b \n\t"
- : [src0] "=&f"(src0), [src1] "=&f"(src1), [dest_hi] "=&f"(dest_hi),
- [dest_lo] "=&f"(dest_lo), [dest] "=&f"(dest)
- : [src_ptr] "r"(src_ptr), [dst_ptr] "r"(dst), [width] "r"(dst_width),
- [shift] "f"(shift), [mask] "f"(mask)
- : "memory");
-}
-
-void ScaleRowDown4_16_MMI(const uint16_t* src_ptr,
- ptrdiff_t src_stride,
- uint16_t* dst,
- int dst_width) {
- (void)src_stride;
-
- uint64_t src0, src1;
- uint64_t dest, dest_hi, dest_lo;
-
- const uint64_t mask = 0x0ULL;
-
- __asm__ volatile(
- "1: \n\t"
- "gsldrc1 %[src0], 0x00(%[src_ptr]) \n\t"
- "gsldlc1 %[src0], 0x07(%[src_ptr]) \n\t"
- "gsldrc1 %[src1], 0x08(%[src_ptr]) \n\t"
- "gsldlc1 %[src1], 0x0f(%[src_ptr]) \n\t"
- "punpckhhw %[dest_lo], %[src0], %[src1] \n\t"
- "punpcklhw %[dest_lo], %[dest_lo], %[mask] \n\t"
-
- "gsldrc1 %[src0], 0x10(%[src_ptr]) \n\t"
- "gsldlc1 %[src0], 0x17(%[src_ptr]) \n\t"
- "gsldrc1 %[src1], 0x18(%[src_ptr]) \n\t"
- "gsldlc1 %[src1], 0x1f(%[src_ptr]) \n\t"
- "punpckhhw %[dest_hi], %[src0], %[src1] \n\t"
- "punpcklhw %[dest_hi], %[dest_hi], %[mask] \n\t"
-
- "packushb %[dest], %[dest_lo], %[dest_hi] \n\t"
- "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
- "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
-
- "daddiu %[src_ptr], %[src_ptr], 0x20 \n\t"
- "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t"
- "daddi %[width], %[width], -0x04 \n\t"
- "bnez %[width], 1b \n\t"
- : [src0] "=&f"(src0), [src1] "=&f"(src1), [dest_hi] "=&f"(dest_hi),
- [dest_lo] "=&f"(dest_lo), [dest] "=&f"(dest)
- : [src_ptr] "r"(src_ptr), [dst_ptr] "r"(dst), [width] "r"(dst_width),
- [mask] "f"(mask)
- : "memory");
-}
-
-#define DO_SCALEROWDOWN4BOX_PUNPCKADD() \
- "punpcklbh %[src_lo], %[src], %[mask0] \n\t" \
- "punpckhbh %[src_hi], %[src], %[mask0] \n\t" \
- "paddh %[dest_lo], %[dest_lo], %[src_lo] \n\t" \
- "paddh %[dest_hi], %[dest_hi], %[src_hi] \n\t"
-
-#define DO_SCALEROWDOWN4BOX_LOOP(reg) \
- "ldc1 %[src], 0x00(%[src0_ptr]) \n\t" \
- "punpcklbh %[dest_lo], %[src], %[mask0] \n\t" \
- "punpckhbh %[dest_hi], %[src], %[mask0] \n\t" \
- \
- "ldc1 %[src], 0x00(%[src1_ptr]) \n\t" \
- DO_SCALEROWDOWN4BOX_PUNPCKADD() \
- \
- "ldc1 %[src], 0x00(%[src2_ptr]) \n\t" \
- DO_SCALEROWDOWN4BOX_PUNPCKADD() \
- \
- "ldc1 %[src], 0x00(%[src3_ptr]) \n\t" \
- DO_SCALEROWDOWN4BOX_PUNPCKADD() \
- \
- "pmaddhw %[dest_lo], %[dest_lo], %[mask1] \n\t" \
- "pmaddhw %[dest_hi], %[dest_hi], %[mask1] \n\t" \
- "packsswh " #reg ", %[dest_lo], %[dest_hi] \n\t" \
- "pmaddhw " #reg ", " #reg ", %[mask1] \n\t" \
- "paddh " #reg ", " #reg ", %[ph] \n\t" \
- "psrlh " #reg ", " #reg ", %[shift] \n\t" \
- \
- "daddiu %[src0_ptr], %[src0_ptr], 0x08 \n\t" \
- "daddiu %[src1_ptr], %[src1_ptr], 0x08 \n\t" \
- "daddiu %[src2_ptr], %[src2_ptr], 0x08 \n\t" \
- "daddiu %[src3_ptr], %[src3_ptr], 0x08 \n\t"
-
-/* LibYUVScaleTest.ScaleDownBy4_Box */
-void ScaleRowDown4Box_MMI(const uint8_t* src_ptr,
- ptrdiff_t src_stride,
- uint8_t* dst,
- int dst_width) {
- const uint8_t* src0_ptr = src_ptr;
- const uint8_t* src1_ptr = src_ptr + src_stride;
- const uint8_t* src2_ptr = src_ptr + src_stride * 2;
- const uint8_t* src3_ptr = src_ptr + src_stride * 3;
-
- uint64_t src, src_hi, src_lo;
- uint64_t dest, dest_hi, dest_lo, dest0, dest1, dest2, dest3;
-
- const uint64_t mask0 = 0x0ULL;
- const uint64_t mask1 = 0x0001000100010001ULL;
- const uint64_t ph = 0x0008000800080008ULL;
- const uint64_t shift = 0x4ULL;
-
- __asm__ volatile(
- "1: \n\t"
-
- DO_SCALEROWDOWN4BOX_LOOP(%[dest0])
- DO_SCALEROWDOWN4BOX_LOOP(%[dest1])
- DO_SCALEROWDOWN4BOX_LOOP(%[dest2])
- DO_SCALEROWDOWN4BOX_LOOP(%[dest3])
-
- "packsswh %[dest_lo], %[dest0], %[dest1] \n\t"
- "packsswh %[dest_hi], %[dest2], %[dest3] \n\t"
-
- "packushb %[dest], %[dest_lo], %[dest_hi] \n\t"
- "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
- "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
-
- "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t"
- "daddi %[width], %[width], -0x08 \n\t"
- "bnez %[width], 1b \n\t"
- : [src_hi] "=&f"(src_hi), [src_lo] "=&f"(src_lo),
- [dest_hi] "=&f"(dest_hi), [dest_lo] "=&f"(dest_lo),
- [dest0] "=&f"(dest0), [dest1] "=&f"(dest1), [dest2] "=&f"(dest2),
- [dest3] "=&f"(dest3), [src] "=&f"(src), [dest] "=&f"(dest)
- : [src0_ptr] "r"(src0_ptr), [src1_ptr] "r"(src1_ptr),
- [src2_ptr] "r"(src2_ptr), [src3_ptr] "r"(src3_ptr), [dst_ptr] "r"(dst),
- [width] "r"(dst_width), [shift] "f"(shift), [mask0] "f"(mask0),
- [ph] "f"(ph), [mask1] "f"(mask1)
- : "memory");
-}
-
-#define DO_SCALEROWDOWN4BOX_16_PUNPCKADD() \
- "punpcklbh %[src_lo], %[src], %[mask0] \n\t" \
- "punpckhbh %[src_hi], %[src], %[mask0] \n\t" \
- "paddh %[dest_lo], %[dest_lo], %[src_lo] \n\t" \
- "paddh %[dest_hi], %[dest_hi], %[src_hi] \n\t"
-
-#define DO_SCALEROWDOWN4BOX_16_LOOP(reg) \
- "ldc1 %[src], 0x00(%[src0_ptr]) \n\t" \
- "punpcklbh %[dest_lo], %[src], %[mask0] \n\t" \
- "punpckhbh %[dest_hi], %[src], %[mask0] \n\t" \
- \
- "ldc1 %[src], 0x00(%[src1_ptr]) \n\t" \
- DO_SCALEROWDOWN4BOX_16_PUNPCKADD() \
- \
- "ldc1 %[src], 0x00(%[src2_ptr]) \n\t" \
- DO_SCALEROWDOWN4BOX_16_PUNPCKADD() \
- \
- "ldc1 %[src], 0x00(%[src3_ptr]) \n\t" \
- DO_SCALEROWDOWN4BOX_16_PUNPCKADD() \
- \
- "paddw %[dest], %[dest_lo], %[dest_hi] \n\t" \
- "punpckhwd %[dest_hi], %[dest], %[dest] \n\t" \
- "paddw %[dest], %[dest_hi], %[dest] \n\t" \
- "paddw %[dest], %[dest], %[ph] \n\t" \
- "psraw %[dest], %[dest], %[shift] \n\t" \
- "and " #reg ", %[dest], %[mask1] \n\t" \
- \
- "daddiu %[src0_ptr], %[src0_ptr], 0x08 \n\t" \
- "daddiu %[src1_ptr], %[src1_ptr], 0x08 \n\t" \
- "daddiu %[src2_ptr], %[src2_ptr], 0x08 \n\t" \
- "daddiu %[src3_ptr], %[src3_ptr], 0x08 \n\t"
-
-/* LibYUVScaleTest.ScaleDownBy4_Box_16 */
-void ScaleRowDown4Box_16_MMI(const uint16_t* src_ptr,
- ptrdiff_t src_stride,
- uint16_t* dst,
- int dst_width) {
- const uint16_t* src0_ptr = src_ptr;
- const uint16_t* src1_ptr = src_ptr + src_stride;
- const uint16_t* src2_ptr = src_ptr + src_stride * 2;
- const uint16_t* src3_ptr = src_ptr + src_stride * 3;
-
- uint64_t src, src_hi, src_lo;
- uint64_t dest, dest_hi, dest_lo, dest0, dest1, dest2, dest3;
-
- const uint64_t mask0 = 0x0ULL;
- const uint64_t mask1 = 0x00000000ffffffffULL;
- const uint64_t ph = 0x0000000800000008ULL;
- const uint64_t shift = 0x04ULL;
-
- __asm__ volatile(
- "1: \n\t"
-
- DO_SCALEROWDOWN4BOX_16_LOOP(%[dest0])
- DO_SCALEROWDOWN4BOX_16_LOOP(%[dest1])
- DO_SCALEROWDOWN4BOX_16_LOOP(%[dest2])
- DO_SCALEROWDOWN4BOX_16_LOOP(%[dest3])
- "punpcklwd %[dest_lo], %[dest0], %[dest1] \n\t"
- "punpcklwd %[dest_hi], %[dest2], %[dest3] \n\t"
-
- "packushb %[dest], %[dest_lo], %[dest_hi] \n\t"
- "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
- "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
-
- "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t"
- "daddi %[width], %[width], -0x04 \n\t"
- "bnez %[width], 1b \n\t"
- : [src_hi] "=&f"(src_hi), [src_lo] "=&f"(src_lo),
- [dest_hi] "=&f"(dest_hi), [dest_lo] "=&f"(dest_lo),
- [dest0] "=&f"(dest0), [dest1] "=&f"(dest1), [dest2] "=&f"(dest2),
- [dest3] "=&f"(dest3), [src] "=&f"(src), [dest] "=&f"(dest)
- : [src0_ptr] "r"(src0_ptr), [src1_ptr] "r"(src1_ptr),
- [src2_ptr] "r"(src2_ptr), [src3_ptr] "r"(src3_ptr), [dst_ptr] "r"(dst),
- [width] "r"(dst_width), [shift] "f"(shift), [mask0] "f"(mask0),
- [ph] "f"(ph), [mask1] "f"(mask1)
- : "memory");
-}
-
-// Scales a single row of pixels up by 2x using point sampling.
-void ScaleColsUp2_MMI(uint8_t* dst_ptr,
- const uint8_t* src_ptr,
- int dst_width,
- int x,
- int dx) {
- uint64_t src, dest;
-
- (void)x;
- (void)dx;
-
- __asm__ volatile(
- "1: \n\t"
- "lwc1 %[src], 0x00(%[src_ptr]) \n\t"
-
- "punpcklbh %[dest], %[src], %[src] \n\t"
- "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
- "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
-
- "daddiu %[src_ptr], %[src_ptr], 0x04 \n\t"
- "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t"
- "daddi %[width], %[width], -0x08 \n\t"
- "bnez %[width], 1b \n\t"
- : [src] "=&f"(src), [dest] "=&f"(dest)
- : [src_ptr] "r"(src_ptr), [dst_ptr] "r"(dst_ptr), [width] "r"(dst_width)
- : "memory");
-}
-
-void ScaleColsUp2_16_MMI(uint16_t* dst_ptr,
- const uint16_t* src_ptr,
- int dst_width,
- int x,
- int dx) {
- uint64_t src, dest;
-
- (void)x;
- (void)dx;
-
- __asm__ volatile(
- "1: \n\t"
- "gsldrc1 %[src], 0x00(%[src_ptr]) \n\t"
- "gsldlc1 %[src], 0x07(%[src_ptr]) \n\t"
-
- "punpcklhw %[dest], %[src], %[src] \n\t"
- "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
- "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
-
- "punpckhhw %[dest], %[src], %[src] \n\t"
- "gssdlc1 %[dest], 0x0f(%[dst_ptr]) \n\t"
- "gssdrc1 %[dest], 0x08(%[dst_ptr]) \n\t"
-
- "daddiu %[src_ptr], %[src_ptr], 0x08 \n\t"
- "daddiu %[dst_ptr], %[dst_ptr], 0x10 \n\t"
- "daddi %[width], %[width], -0x08 \n\t"
- "bnez %[width], 1b \n\t"
- : [src] "=&f"(src), [dest] "=&f"(dest)
- : [src_ptr] "r"(src_ptr), [dst_ptr] "r"(dst_ptr), [width] "r"(dst_width)
- : "memory");
-}
-
-void ScaleAddRow_MMI(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width) {
- uint64_t src, src_hi, src_lo, dest0, dest1;
- const uint64_t mask = 0x0ULL;
-
- __asm__ volatile(
- "1: \n\t"
- "gsldlc1 %[src], 0x07(%[src_ptr]) \n\t"
- "gsldrc1 %[src], 0x00(%[src_ptr]) \n\t"
- "punpcklbh %[src_lo], %[src], %[mask] \n\t"
- "punpckhbh %[src_hi], %[src], %[mask] \n\t"
-
- "gsldrc1 %[dest0], 0x00(%[dst_ptr]) \n\t"
- "gsldlc1 %[dest0], 0x07(%[dst_ptr]) \n\t"
- "paddush %[dest0], %[dest0], %[src_lo] \n\t"
- "gsldrc1 %[dest1], 0x08(%[dst_ptr]) \n\t"
- "gsldlc1 %[dest1], 0x0f(%[dst_ptr]) \n\t"
- "paddush %[dest1], %[dest1], %[src_hi] \n\t"
-
- "gssdlc1 %[dest0], 0x07(%[dst_ptr]) \n\t"
- "gssdrc1 %[dest0], 0x00(%[dst_ptr]) \n\t"
- "gssdlc1 %[dest1], 0x0f(%[dst_ptr]) \n\t"
- "gssdrc1 %[dest1], 0x08(%[dst_ptr]) \n\t"
-
- "daddiu %[src_ptr], %[src_ptr], 0x08 \n\t"
- "daddiu %[dst_ptr], %[dst_ptr], 0x10 \n\t"
- "daddi %[width], %[width], -0x08 \n\t"
- "bnez %[width], 1b \n\t"
- : [dest0] "=&f"(dest0), [dest1] "=&f"(dest1), [src_hi] "=&f"(src_hi),
- [src_lo] "=&f"(src_lo), [src] "=&f"(src)
- : [src_ptr] "r"(src_ptr), [dst_ptr] "r"(dst_ptr), [width] "r"(src_width),
- [mask] "f"(mask)
- : "memory");
-}
-
-void ScaleAddRow_16_MMI(const uint16_t* src_ptr,
- uint32_t* dst_ptr,
- int src_width) {
- uint64_t src, src_hi, src_lo, dest0, dest1;
- const uint64_t mask = 0x0ULL;
-
- __asm__ volatile(
- "1: \n\t"
- "gsldrc1 %[src], 0x00(%[src_ptr]) \n\t"
- "gsldlc1 %[src], 0x07(%[src_ptr]) \n\t"
- "punpcklhw %[src_lo], %[src], %[mask] \n\t"
- "punpckhhw %[src_hi], %[src], %[mask] \n\t"
-
- "gsldrc1 %[dest0], 0x00(%[dst_ptr]) \n\t"
- "gsldlc1 %[dest0], 0x07(%[dst_ptr]) \n\t"
- "paddw %[dest0], %[dest0], %[src_lo] \n\t"
- "gssdlc1 %[dest0], 0x07(%[dst_ptr]) \n\t"
- "gssdrc1 %[dest0], 0x00(%[dst_ptr]) \n\t"
-
- "gsldrc1 %[dest1], 0x08(%[dst_ptr]) \n\t"
- "gsldlc1 %[dest1], 0x0f(%[dst_ptr]) \n\t"
- "paddw %[dest1], %[dest1], %[src_hi] \n\t"
- "gssdlc1 %[dest1], 0x0f(%[dst_ptr]) \n\t"
- "gssdrc1 %[dest1], 0x08(%[dst_ptr]) \n\t"
-
- "daddiu %[src_ptr], %[src_ptr], 0x08 \n\t"
- "daddiu %[dst_ptr], %[dst_ptr], 0x10 \n\t"
- "daddi %[width], %[width], -0x04 \n\t"
- "bnez %[width], 1b \n\t"
- : [dest0] "=&f"(dest0), [dest1] "=&f"(dest1), [src_hi] "=&f"(src_hi),
- [src_lo] "=&f"(src_lo), [src] "=&f"(src)
- : [src_ptr] "r"(src_ptr), [dst_ptr] "r"(dst_ptr), [width] "r"(src_width),
- [mask] "f"(mask)
- : "memory");
-}
-
-void ScaleARGBRowDownEven_MMI(const uint8_t* src_argb,
- ptrdiff_t src_stride,
- int src_stepx,
- uint8_t* dst_argb,
- int dst_width) {
- (void)src_stride;
-
- uint64_t src0, src1, dest;
-
- __asm__ volatile(
- "1: \n\t"
- "lwc1 %[src0], 0x00(%[src_ptr]) \n\t"
- "dadd %[src_ptr], %[src_ptr], %[src_stepx_4]\n\t"
- "lwc1 %[src1], 0x00(%[src_ptr]) \n\t"
- "punpcklwd %[dest], %[src0], %[src1] \n\t"
-
- "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
- "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
-
- "dadd %[src_ptr], %[src_ptr], %[src_stepx_4]\n\t"
- "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t"
- "daddi %[width], %[width], -0x02 \n\t"
- "bnez %[width], 1b \n\t"
- : [src0] "=&f"(src0), [src1] "=&f"(src1), [dest] "=&f"(dest)
- : [src_ptr] "r"(src_argb), [dst_ptr] "r"(dst_argb),
- [src_stepx_4] "r"(src_stepx << 2), [width] "r"(dst_width)
- : "memory");
-}
-
-void ScaleARGBRowDownEvenBox_MMI(const uint8_t* src_argb,
- ptrdiff_t src_stride,
- int src_stepx,
- uint8_t* dst_argb,
- int dst_width) {
- const uint8_t* src0_ptr = src_argb;
- const uint8_t* src1_ptr = src_argb + src_stride;
-
- uint64_t src0, src1, src_hi, src_lo;
- uint64_t dest, dest_hi, dest_lo, dest0, dest1;
-
- const uint64_t mask = 0x0ULL;
- const uint64_t ph = 0x0002000200020002ULL;
- const uint64_t shift = 0x2ULL;
-
- __asm__ volatile(
- "1: \n\t"
-
- "lwc1 %[src0], 0x00(%[src0_ptr]) \n\t"
- "punpcklbh %[dest_lo], %[src0], %[mask] \n\t"
- "lwc1 %[src0], 0x04(%[src0_ptr]) \n\t"
- "punpcklbh %[dest_hi], %[src0], %[mask] \n\t"
-
- "lwc1 %[src1], 0x00(%[src1_ptr]) \n\t"
- "punpcklbh %[src_lo], %[src1], %[mask] \n\t"
- "lwc1 %[src1], 0x04(%[src1_ptr]) \n\t"
- "punpcklbh %[src_hi], %[src1], %[mask] \n\t"
- "paddh %[dest_lo], %[dest_lo], %[src_lo] \n\t"
- "paddh %[dest_hi], %[dest_hi], %[src_hi] \n\t"
- "paddh %[dest0], %[dest_hi], %[dest_lo] \n\t"
- "paddh %[dest0], %[dest0], %[ph] \n\t"
- "psrlh %[dest0], %[dest0], %[shift] \n\t"
-
- "dadd %[src0_ptr], %[src0_ptr], %[src_stepx_4] \n\t"
- "dadd %[src1_ptr], %[src1_ptr], %[src_stepx_4] \n\t"
-
- "lwc1 %[src0], 0x00(%[src0_ptr]) \n\t"
- "punpcklbh %[dest_lo], %[src0], %[mask] \n\t"
- "lwc1 %[src0], 0x04(%[src0_ptr]) \n\t"
- "punpcklbh %[dest_hi], %[src0], %[mask] \n\t"
-
- "lwc1 %[src1], 0x00(%[src1_ptr]) \n\t"
- "punpcklbh %[src_lo], %[src1], %[mask] \n\t"
- "lwc1 %[src1], 0x04(%[src1_ptr]) \n\t"
- "punpcklbh %[src_hi], %[src1], %[mask] \n\t"
- "paddh %[dest_lo], %[dest_lo], %[src_lo] \n\t"
- "paddh %[dest_hi], %[dest_hi], %[src_hi] \n\t"
- "paddh %[dest1], %[dest_hi], %[dest_lo] \n\t"
- "paddh %[dest1], %[dest1], %[ph] \n\t"
- "psrlh %[dest1], %[dest1], %[shift] \n\t"
-
- "packushb %[dest], %[dest0], %[dest1] \n\t"
- "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
- "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
-
- "dadd %[src0_ptr], %[src0_ptr], %[src_stepx_4] \n\t"
- "dadd %[src1_ptr], %[src1_ptr], %[src_stepx_4] \n\t"
- "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t"
- "daddi %[width], %[width], -0x02 \n\t"
- "bnez %[width], 1b \n\t"
- : [src_hi] "=&f"(src_hi), [src_lo] "=&f"(src_lo),
- [dest_hi] "=&f"(dest_hi), [dest_lo] "=&f"(dest_lo),
- [dest0] "=&f"(dest0), [dest1] "=&f"(dest1), [src0] "=&f"(src0),
- [src1] "=&f"(src1), [dest] "=&f"(dest)
- : [src0_ptr] "r"(src0_ptr), [src1_ptr] "r"(src1_ptr),
- [dst_ptr] "r"(dst_argb), [width] "r"(dst_width),
- [src_stepx_4] "r"(src_stepx << 2), [shift] "f"(shift), [mask] "f"(mask),
- [ph] "f"(ph)
- : "memory");
-}
-
-// Scales a single row of pixels using point sampling.
-void ScaleARGBCols_MMI(uint8_t* dst_argb,
- const uint8_t* src_argb,
- int dst_width,
- int x,
- int dx) {
- const uint32_t* src = (const uint32_t*)(src_argb);
- uint32_t* dst = (uint32_t*)(dst_argb);
-
- const uint32_t* src_tmp;
-
- uint64_t dest, offset;
-
- const uint64_t shift0 = 16;
- const uint64_t shift1 = 2;
-
- __asm__ volatile(
- "1: \n\t"
- "srav %[offset], %[x], %[shift0] \n\t"
- "sllv %[offset], %[offset], %[shift1] \n\t"
- "dadd %[src_tmp], %[src_ptr], %[offset] \n\t"
- "lwc1 %[dest], 0x00(%[src_tmp]) \n\t"
- "swc1 %[dest], 0x00(%[dst_ptr]) \n\t"
-
- "dadd %[x], %[x], %[dx] \n\t"
-
- "daddiu %[dst_ptr], %[dst_ptr], 0x04 \n\t"
- "daddi %[width], %[width], -0x01 \n\t"
- "bnez %[width], 1b \n\t"
- : [dest] "=&f"(dest), [offset] "=&r"(offset), [src_tmp] "=&r"(src_tmp)
- : [src_ptr] "r"(src), [dst_ptr] "r"(dst), [width] "r"(dst_width),
- [dx] "r"(dx), [x] "r"(x), [shift0] "r"(shift0), [shift1] "r"(shift1)
- : "memory");
-}
-
-// Scales a single row of pixels up by 2x using point sampling.
-void ScaleARGBColsUp2_MMI(uint8_t* dst_argb,
- const uint8_t* src_argb,
- int dst_width,
- int x,
- int dx) {
- uint64_t src, dest0, dest1;
- (void)x;
- (void)dx;
-
- __asm__ volatile(
- "1: \n\t"
- "gsldrc1 %[src], 0x00(%[src_ptr]) \n\t"
- "gsldlc1 %[src], 0x07(%[src_ptr]) \n\t"
- "punpcklwd %[dest0], %[src], %[src] \n\t"
- "gssdlc1 %[dest0], 0x07(%[dst_ptr]) \n\t"
- "gssdrc1 %[dest0], 0x00(%[dst_ptr]) \n\t"
- "punpckhwd %[dest1], %[src], %[src] \n\t"
- "gssdlc1 %[dest1], 0x0f(%[dst_ptr]) \n\t"
- "gssdrc1 %[dest1], 0x08(%[dst_ptr]) \n\t"
-
- "daddiu %[src_ptr], %[src_ptr], 0x08 \n\t"
- "daddiu %[dst_ptr], %[dst_ptr], 0x10 \n\t"
- "daddi %[width], %[width], -0x04 \n\t"
- "bnez %[width], 1b \n\t"
- : [dest0] "=&f"(dest0), [dest1] "=&f"(dest1), [src] "=&f"(src)
- : [src_ptr] "r"(src_argb), [dst_ptr] "r"(dst_argb), [width] "r"(dst_width)
- : "memory");
-}
-
-// Divide num by div and return as 16.16 fixed point result.
-/* LibYUVBaseTest.TestFixedDiv */
-int FixedDiv_MIPS(int num, int div) {
- int quotient = 0;
- const int shift = 16;
-
- asm(
- "dsll %[num], %[num], %[shift] \n\t"
- "ddiv %[num], %[div] \t\n"
- "mflo %[quo] \t\n"
- : [quo] "+&r"(quotient)
- : [num] "r"(num), [div] "r"(div), [shift] "r"(shift));
-
- return quotient;
-}
-
-// Divide num by div and return as 16.16 fixed point result.
-/* LibYUVScaleTest.ARGBScaleTo320x240_Linear */
-int FixedDiv1_MIPS(int num, int div) {
- int quotient = 0;
- const int shift = 16;
- const int val1 = 1;
- const int64_t val11 = 0x00010001ULL;
-
- asm(
- "dsll %[num], %[num], %[shift] \n\t"
- "dsub %[num], %[num], %[val11] \n\t"
- "dsub %[div], %[div], %[val1] \n\t"
- "ddiv %[num], %[div] \t\n"
- "mflo %[quo] \t\n"
- : [quo] "+&r"(quotient)
- : [num] "r"(num), [div] "r"(div), [val1] "r"(val1), [val11] "r"(val11),
- [shift] "r"(shift));
-
- return quotient;
-}
-
-// Read 8x2 upsample with filtering and write 16x1.
-// actually reads an extra pixel, so 9x2.
-void ScaleRowUp2_16_MMI(const uint16_t* src_ptr,
- ptrdiff_t src_stride,
- uint16_t* dst,
- int dst_width) {
- const uint16_t* src2_ptr = src_ptr + src_stride;
-
- uint64_t src0, src1;
- uint64_t dest, dest04, dest15, dest26, dest37;
- uint64_t tmp0, tmp1, tmp2, tmp3;
-
- const uint64_t mask0 = 0x0003000900030009ULL;
- const uint64_t mask1 = 0x0001000300010003ULL;
- const uint64_t mask2 = 0x0009000300090003ULL;
- const uint64_t mask3 = 0x0003000100030001ULL;
- const uint64_t ph = 0x0000000800000008ULL;
- const uint64_t shift = 4;
-
- __asm__ volatile(
- "1: \n\t"
- "gsldrc1 %[src0], 0x00(%[src1_ptr]) \n\t"
- "gsldlc1 %[src0], 0x07(%[src1_ptr]) \n\t"
- "pmaddhw %[dest04], %[src0], %[mask0] \n\t"
- "gsldrc1 %[src1], 0x00(%[src2_ptr]) \n\t"
- "gsldlc1 %[src1], 0x07(%[src2_ptr]) \n\t"
- "pmaddhw %[dest], %[src1], %[mask1] \n\t"
- "paddw %[dest04], %[dest04], %[dest] \n\t"
- "paddw %[dest04], %[dest04], %[ph] \n\t"
- "psrlw %[dest04], %[dest04], %[shift] \n\t"
-
- "pmaddhw %[dest15], %[src0], %[mask2] \n\t"
- "pmaddhw %[dest], %[src1], %[mask3] \n\t"
- "paddw %[dest15], %[dest15], %[dest] \n\t"
- "paddw %[dest15], %[dest15], %[ph] \n\t"
- "psrlw %[dest15], %[dest15], %[shift] \n\t"
-
- "gsldrc1 %[src0], 0x02(%[src1_ptr]) \n\t"
- "gsldlc1 %[src0], 0x09(%[src1_ptr]) \n\t"
- "pmaddhw %[dest26], %[src0], %[mask0] \n\t"
- "gsldrc1 %[src1], 0x02(%[src2_ptr]) \n\t"
- "gsldlc1 %[src1], 0x09(%[src2_ptr]) \n\t"
- "pmaddhw %[dest], %[src1], %[mask1] \n\t"
- "paddw %[dest26], %[dest26], %[dest] \n\t"
- "paddw %[dest26], %[dest26], %[ph] \n\t"
- "psrlw %[dest26], %[dest26], %[shift] \n\t"
-
- "pmaddhw %[dest37], %[src0], %[mask2] \n\t"
- "pmaddhw %[dest], %[src1], %[mask3] \n\t"
- "paddw %[dest37], %[dest37], %[dest] \n\t"
- "paddw %[dest37], %[dest37], %[ph] \n\t"
- "psrlw %[dest37], %[dest37], %[shift] \n\t"
-
- /* tmp0 = ( 00 04 02 06 ) */
- "packsswh %[tmp0], %[dest04], %[dest26] \n\t"
- /* tmp1 = ( 01 05 03 07 ) */
- "packsswh %[tmp1], %[dest15], %[dest37] \n\t"
-
- /* tmp2 = ( 00 01 04 05 )*/
- "punpcklhw %[tmp2], %[tmp0], %[tmp1] \n\t"
- /* tmp3 = ( 02 03 06 07 )*/
- "punpckhhw %[tmp3], %[tmp0], %[tmp1] \n\t"
-
- /* ( 00 01 02 03 ) */
- "punpcklwd %[dest], %[tmp2], %[tmp3] \n\t"
- "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
- "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
-
- /* ( 04 05 06 07 ) */
- "punpckhwd %[dest], %[tmp2], %[tmp3] \n\t"
- "gssdlc1 %[dest], 0x0f(%[dst_ptr]) \n\t"
- "gssdrc1 %[dest], 0x08(%[dst_ptr]) \n\t"
-
- "daddiu %[src1_ptr], %[src1_ptr], 0x08 \n\t"
- "daddiu %[src2_ptr], %[src2_ptr], 0x08 \n\t"
- "daddiu %[dst_ptr], %[dst_ptr], 0x10 \n\t"
- "daddi %[width], %[width], -0x08 \n\t"
- "bnez %[width], 1b \n\t"
- : [src0] "=&f"(src0), [src1] "=&f"(src1), [dest04] "=&f"(dest04),
- [dest15] "=&f"(dest15), [dest26] "=&f"(dest26), [dest37] "=&f"(dest37),
- [tmp0] "=&f"(tmp0), [tmp1] "=&f"(tmp1), [tmp2] "=&f"(tmp2),
- [tmp3] "=&f"(tmp3), [dest] "=&f"(dest)
- : [src1_ptr] "r"(src_ptr), [src2_ptr] "r"(src2_ptr), [dst_ptr] "r"(dst),
- [width] "r"(dst_width), [mask0] "f"(mask0), [mask1] "f"(mask1),
- [mask2] "f"(mask2), [mask3] "f"(mask3), [shift] "f"(shift), [ph] "f"(ph)
- : "memory");
-}
-
-void ScaleRowDown34_MMI(const uint8_t* src_ptr,
- ptrdiff_t src_stride,
- uint8_t* dst,
- int dst_width) {
- (void)src_stride;
- assert((dst_width % 3 == 0) && (dst_width > 0));
- uint64_t src[2];
- uint64_t tmp[2];
- __asm__ volatile (
- "1: \n\t"
- "gsldlc1 %[src0], 0x07(%[src_ptr]) \n\t"
- "gsldrc1 %[src0], 0x00(%[src_ptr]) \n\t"
- "gsldlc1 %[src1], 0x0f(%[src_ptr]) \n\t"
- "gsldrc1 %[src1], 0x08(%[src_ptr]) \n\t"
- "and %[tmp1], %[src0], %[mask1] \n\t"
- "psrlw %[tmp0], %[src0], %[rmov] \n\t"
- "psllw %[tmp0], %[tmp0], %[lmov1] \n\t"
- "or %[src0], %[tmp0], %[tmp1] \n\t"
- "punpckhwd %[tmp0], %[src0], %[src0] \n\t"
- "psllw %[tmp1], %[tmp0], %[rmov] \n\t"
- "or %[src0], %[src0], %[tmp1] \n\t"
- "psrlw %[tmp0], %[tmp0], %[rmov8] \n\t"
- "pextrh %[tmp0], %[tmp0], %[zero] \n\t"
- "pinsrh_2 %[src0], %[src0], %[tmp0] \n\t"
- "pextrh %[tmp0], %[src1], %[zero] \n\t"
- "pinsrh_3 %[src0], %[src0], %[tmp0] \n\t"
-
- "punpckhwd %[tmp0], %[src1], %[src1] \n\t"
- "pextrh %[tmp1], %[tmp0], %[zero] \n\t"
- "psrlw %[src1], %[src1], %[rmov] \n\t"
- "psllw %[tmp1], %[tmp1], %[rmov8] \n\t"
- "or %[src1], %[src1], %[tmp1] \n\t"
- "and %[tmp0], %[tmp0], %[mask2] \n\t"
- "or %[src1], %[src1], %[tmp0] \n\t"
-
- "gssdlc1 %[src0], 0x07(%[dst_ptr]) \n\t"
- "gssdrc1 %[src0], 0x00(%[dst_ptr]) \n\t"
- "gsswlc1 %[src1], 0x0b(%[dst_ptr]) \n\t"
- "gsswrc1 %[src1], 0x08(%[dst_ptr]) \n\t"
-
- "daddiu %[src_ptr], %[src_ptr], 0x10 \n\t"
- "daddi %[width], %[width], -0x0c \n\t"
- "daddiu %[dst_ptr], %[dst_ptr], 0x0c \n\t"
- "bnez %[width], 1b \n\t"
-
- : [src0]"=&f"(src[0]), [src1]"=&f"(src[1]),
- [tmp0]"=&f"(tmp[0]), [tmp1]"=&f"(tmp[1])
- : [src_ptr]"r"(src_ptr), [dst_ptr]"r"(dst),
- [lmov]"f"(0xc), [rmov]"f"(0x18),
- [mask1]"f"(0xffff0000ffff), [rmov8]"f"(0x8),
- [zero]"f"(0x0), [mask2]"f"(0xff000000),
- [width]"r"(dst_width), [lmov1]"f"(0x10)
- : "memory"
- );
-}
-// clang-format on
-
-#endif // !defined(LIBYUV_DISABLE_MMI) && defined(_MIPS_ARCH_LOONGSON3A)
-
-#ifdef __cplusplus
-} // extern "C"
-} // namespace libyuv
-#endif
diff --git a/files/source/scale_neon.cc b/files/source/scale_neon.cc
index 6a0d6e1b..ccc75106 100644
--- a/files/source/scale_neon.cc
+++ b/files/source/scale_neon.cc
@@ -1428,6 +1428,45 @@ void ScaleARGBFilterCols_NEON(uint8_t* dst_argb,
#undef LOAD2_DATA32_LANE
+void ScaleUVRowDown2_NEON(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst,
+ int dst_width) {
+ (void)src_stride;
+ asm volatile(
+ "1: \n"
+ "vld2.16 {d0, d2}, [%0]! \n" // load 8 UV pixels.
+ "vld2.16 {d1, d3}, [%0]! \n" // load next 8 UV
+ "subs %2, %2, #8 \n" // 8 processed per loop.
+ "vst1.16 {q1}, [%1]! \n" // store 8 UV
+ "bgt 1b \n"
+ : "+r"(src_ptr), // %0
+ "+r"(dst), // %1
+ "+r"(dst_width) // %2
+ :
+ : "memory", "cc", "q0", "q1");
+}
+
+void ScaleUVRowDown2Linear_NEON(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst,
+ int dst_width) {
+ (void)src_stride;
+ asm volatile(
+ "1: \n"
+ "vld2.16 {d0, d2}, [%0]! \n" // load 8 UV pixels.
+ "vld2.16 {d1, d3}, [%0]! \n" // load next 8 UV
+ "subs %2, %2, #8 \n" // 8 processed per loop.
+ "vrhadd.u8 q0, q0, q1 \n" // rounding half add
+ "vst1.16 {q0}, [%1]! \n" // store 8 UV
+ "bgt 1b \n"
+ : "+r"(src_ptr), // %0
+ "+r"(dst), // %1
+ "+r"(dst_width) // %2
+ :
+ : "memory", "cc", "q0", "q1");
+}
+
void ScaleUVRowDown2Box_NEON(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst,
diff --git a/files/source/scale_neon64.cc b/files/source/scale_neon64.cc
index 9f9636e6..ad06ee83 100644
--- a/files/source/scale_neon64.cc
+++ b/files/source/scale_neon64.cc
@@ -1568,6 +1568,45 @@ void ScaleRowUp2_16_NEON(const uint16_t* src_ptr,
);
}
+void ScaleUVRowDown2_NEON(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst,
+ int dst_width) {
+ (void)src_stride;
+ asm volatile(
+ "1: \n"
+ "ld2 {v0.8h,v1.8h}, [%0], #32 \n" // load 16 UV
+ "subs %w2, %w2, #8 \n" // 8 processed per loop.
+ "prfm pldl1keep, [%0, 448] \n" // prefetch 7 lines ahead
+ "st1 {v1.8h}, [%1], #16 \n" // store 8 UV
+ "b.gt 1b \n"
+ : "+r"(src_ptr), // %0
+ "+r"(dst), // %1
+ "+r"(dst_width) // %2
+ :
+ : "memory", "cc", "v0", "v1");
+}
+
+void ScaleUVRowDown2Linear_NEON(const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint8_t* dst,
+ int dst_width) {
+ (void)src_stride;
+ asm volatile(
+ "1: \n"
+ "ld2 {v0.8h,v1.8h}, [%0], #32 \n" // load 16 UV
+ "subs %w2, %w2, #8 \n" // 8 processed per loop.
+ "urhadd v0.16b, v0.16b, v1.16b \n" // rounding half add
+ "prfm pldl1keep, [%0, 448] \n" // prefetch 7 lines ahead
+ "st1 {v0.8h}, [%1], #16 \n" // store 8 UV
+ "b.gt 1b \n"
+ : "+r"(src_ptr), // %0
+ "+r"(dst), // %1
+ "+r"(dst_width) // %2
+ :
+ : "memory", "cc", "v0", "v1");
+}
+
void ScaleUVRowDown2Box_NEON(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst,
diff --git a/files/source/scale_uv.cc b/files/source/scale_uv.cc
index 3b3d7b8e..1556071d 100644
--- a/files/source/scale_uv.cc
+++ b/files/source/scale_uv.cc
@@ -83,9 +83,9 @@ static void ScaleUVDown2(int src_width,
assert((dy & 0x1ffff) == 0); // Test vertical scale is multiple of 2.
// Advance to odd row, even column.
if (filtering == kFilterBilinear) {
- src_uv += (y >> 16) * (int64_t)src_stride + (x >> 16) * 2;
+ src_uv += (y >> 16) * (intptr_t)src_stride + (x >> 16) * 2;
} else {
- src_uv += (y >> 16) * (int64_t)src_stride + ((x >> 16) - 1) * 2;
+ src_uv += (y >> 16) * (intptr_t)src_stride + ((x >> 16) - 1) * 2;
}
#if defined(HAS_SCALEUVROWDOWN2BOX_SSSE3)
@@ -112,6 +112,22 @@ static void ScaleUVDown2(int src_width,
}
}
#endif
+#if defined(HAS_SCALEUVROWDOWN2_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ ScaleUVRowDown2 =
+ filtering == kFilterNone
+ ? ScaleUVRowDown2_Any_NEON
+ : (filtering == kFilterLinear ? ScaleUVRowDown2Linear_Any_NEON
+ : ScaleUVRowDown2Box_Any_NEON);
+ if (IS_ALIGNED(dst_width, 8)) {
+ ScaleUVRowDown2 =
+ filtering == kFilterNone
+ ? ScaleUVRowDown2_NEON
+ : (filtering == kFilterLinear ? ScaleUVRowDown2Linear_NEON
+ : ScaleUVRowDown2Box_NEON);
+ }
+ }
+#endif
// This code is not enabled. Only box filter is available at this time.
#if defined(HAS_SCALEUVROWDOWN2_SSSE3)
@@ -130,23 +146,7 @@ static void ScaleUVDown2(int src_width,
}
}
#endif
-// This code is not enabled. Only box filter is available at this time.
-#if defined(HAS_SCALEUVROWDOWN2_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- ScaleUVRowDown2 =
- filtering == kFilterNone
- ? ScaleUVRowDown2_Any_NEON
- : (filtering == kFilterLinear ? ScaleUVRowDown2Linear_Any_NEON
- : ScaleUVRowDown2Box_Any_NEON);
- if (IS_ALIGNED(dst_width, 8)) {
- ScaleUVRowDown2 =
- filtering == kFilterNone
- ? ScaleUVRowDown2_NEON
- : (filtering == kFilterLinear ? ScaleUVRowDown2Linear_NEON
- : ScaleUVRowDown2Box_NEON);
- }
- }
-#endif
+
#if defined(HAS_SCALEUVROWDOWN2_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
ScaleUVRowDown2 =
@@ -193,14 +193,14 @@ static void ScaleUVDown4Box(int src_width,
int dy) {
int j;
// Allocate 2 rows of UV.
- const int kRowSize = (dst_width * 2 * 2 + 15) & ~15;
- align_buffer_64(row, kRowSize * 2);
+ const int row_size = (dst_width * 2 * 2 + 15) & ~15;
+ align_buffer_64(row, row_size * 2);
int row_stride = src_stride * (dy >> 16);
void (*ScaleUVRowDown2)(const uint8_t* src_uv, ptrdiff_t src_stride,
uint8_t* dst_uv, int dst_width) =
ScaleUVRowDown2Box_C;
// Advance to odd row, even column.
- src_uv += (y >> 16) * (int64_t)src_stride + (x >> 16) * 2;
+ src_uv += (y >> 16) * (intptr_t)src_stride + (x >> 16) * 2;
(void)src_width;
(void)src_height;
(void)dx;
@@ -234,9 +234,9 @@ static void ScaleUVDown4Box(int src_width,
for (j = 0; j < dst_height; ++j) {
ScaleUVRowDown2(src_uv, src_stride, row, dst_width * 2);
- ScaleUVRowDown2(src_uv + src_stride * 2, src_stride, row + kRowSize,
+ ScaleUVRowDown2(src_uv + src_stride * 2, src_stride, row + row_size,
dst_width * 2);
- ScaleUVRowDown2(row, kRowSize, dst_uv, dst_width);
+ ScaleUVRowDown2(row, row_size, dst_uv, dst_width);
src_uv += row_stride;
dst_uv += dst_stride;
}
@@ -263,7 +263,7 @@ static void ScaleUVDownEven(int src_width,
enum FilterMode filtering) {
int j;
int col_step = dx >> 16;
- int row_stride = (dy >> 16) * (int64_t)src_stride;
+ ptrdiff_t row_stride = (ptrdiff_t)((dy >> 16) * (intptr_t)src_stride);
void (*ScaleUVRowDownEven)(const uint8_t* src_uv, ptrdiff_t src_stride,
int src_step, uint8_t* dst_uv, int dst_width) =
filtering ? ScaleUVRowDownEvenBox_C : ScaleUVRowDownEven_C;
@@ -271,7 +271,7 @@ static void ScaleUVDownEven(int src_width,
(void)src_height;
assert(IS_ALIGNED(src_width, 2));
assert(IS_ALIGNED(src_height, 2));
- src_uv += (y >> 16) * (int64_t)src_stride + (x >> 16) * 2;
+ src_uv += (y >> 16) * (intptr_t)src_stride + (x >> 16) * 2;
#if defined(HAS_SCALEUVROWDOWNEVEN_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ScaleUVRowDownEven = filtering ? ScaleUVRowDownEvenBox_Any_SSSE3
@@ -338,10 +338,10 @@ static void ScaleUVBilinearDown(int src_width,
int dy,
enum FilterMode filtering) {
int j;
- void (*InterpolateRow)(uint8_t * dst_uv, const uint8_t* src_uv,
+ void (*InterpolateRow)(uint8_t* dst_uv, const uint8_t* src_uv,
ptrdiff_t src_stride, int dst_width,
int source_y_fraction) = InterpolateRow_C;
- void (*ScaleUVFilterCols)(uint8_t * dst_uv, const uint8_t* src_uv,
+ void (*ScaleUVFilterCols)(uint8_t* dst_uv, const uint8_t* src_uv,
int dst_width, int x, int dx) =
(src_width >= 32768) ? ScaleUVFilterCols64_C : ScaleUVFilterCols_C;
int64_t xlast = x + (int64_t)(dst_width - 1) * dx;
@@ -397,6 +397,11 @@ static void ScaleUVBilinearDown(int src_width,
}
}
#endif
+#if defined(HAS_INTERPOLATEROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ InterpolateRow = InterpolateRow_RVV;
+ }
+#endif
#if defined(HAS_SCALEUVFILTERCOLS_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
ScaleUVFilterCols = ScaleUVFilterCols_SSSE3;
@@ -429,7 +434,7 @@ static void ScaleUVBilinearDown(int src_width,
}
for (j = 0; j < dst_height; ++j) {
int yi = y >> 16;
- const uint8_t* src = src_uv + yi * (int64_t)src_stride;
+ const uint8_t* src = src_uv + yi * (intptr_t)src_stride;
if (filtering == kFilterLinear) {
ScaleUVFilterCols(dst_uv, src, dst_width, x, dx);
} else {
@@ -464,10 +469,10 @@ static void ScaleUVBilinearUp(int src_width,
int dy,
enum FilterMode filtering) {
int j;
- void (*InterpolateRow)(uint8_t * dst_uv, const uint8_t* src_uv,
+ void (*InterpolateRow)(uint8_t* dst_uv, const uint8_t* src_uv,
ptrdiff_t src_stride, int dst_width,
int source_y_fraction) = InterpolateRow_C;
- void (*ScaleUVFilterCols)(uint8_t * dst_uv, const uint8_t* src_uv,
+ void (*ScaleUVFilterCols)(uint8_t* dst_uv, const uint8_t* src_uv,
int dst_width, int x, int dx) =
filtering ? ScaleUVFilterCols_C : ScaleUVCols_C;
const int max_y = (src_height - 1) << 16;
@@ -511,6 +516,11 @@ static void ScaleUVBilinearUp(int src_width,
}
}
#endif
+#if defined(HAS_INTERPOLATEROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ InterpolateRow = InterpolateRow_RVV;
+ }
+#endif
if (src_width >= 32768) {
ScaleUVFilterCols = filtering ? ScaleUVFilterCols64_C : ScaleUVCols64_C;
}
@@ -571,14 +581,14 @@ static void ScaleUVBilinearUp(int src_width,
{
int yi = y >> 16;
- const uint8_t* src = src_uv + yi * (int64_t)src_stride;
+ const uint8_t* src = src_uv + yi * (intptr_t)src_stride;
// Allocate 2 rows of UV.
- const int kRowSize = (dst_width * 2 + 15) & ~15;
- align_buffer_64(row, kRowSize * 2);
+ const int row_size = (dst_width * 2 + 15) & ~15;
+ align_buffer_64(row, row_size * 2);
uint8_t* rowptr = row;
- int rowstride = kRowSize;
+ int rowstride = row_size;
int lasty = yi;
ScaleUVFilterCols(rowptr, src, dst_width, x, dx);
@@ -596,7 +606,7 @@ static void ScaleUVBilinearUp(int src_width,
if (y > max_y) {
y = max_y;
yi = y >> 16;
- src = src_uv + yi * (int64_t)src_stride;
+ src = src_uv + yi * (intptr_t)src_stride;
}
if (yi != lasty) {
ScaleUVFilterCols(rowptr, src, dst_width, x, dx);
@@ -644,32 +654,32 @@ void ScaleUVLinearUp2(int src_width,
// This function can only scale up by 2 times horizontally.
assert(src_width == ((dst_width + 1) / 2));
-#ifdef HAS_SCALEUVROWUP2LINEAR_SSSE3
+#ifdef HAS_SCALEUVROWUP2_LINEAR_SSSE3
if (TestCpuFlag(kCpuHasSSSE3)) {
ScaleRowUp = ScaleUVRowUp2_Linear_Any_SSSE3;
}
#endif
-#ifdef HAS_SCALEUVROWUP2LINEAR_AVX2
+#ifdef HAS_SCALEUVROWUP2_LINEAR_AVX2
if (TestCpuFlag(kCpuHasAVX2)) {
ScaleRowUp = ScaleUVRowUp2_Linear_Any_AVX2;
}
#endif
-#ifdef HAS_SCALEUVROWUP2LINEAR_NEON
+#ifdef HAS_SCALEUVROWUP2_LINEAR_NEON
if (TestCpuFlag(kCpuHasNEON)) {
ScaleRowUp = ScaleUVRowUp2_Linear_Any_NEON;
}
#endif
if (dst_height == 1) {
- ScaleRowUp(src_uv + ((src_height - 1) / 2) * (int64_t)src_stride, dst_uv,
+ ScaleRowUp(src_uv + ((src_height - 1) / 2) * (intptr_t)src_stride, dst_uv,
dst_width);
} else {
dy = FixedDiv(src_height - 1, dst_height - 1);
y = (1 << 15) - 1;
for (i = 0; i < dst_height; ++i) {
- ScaleRowUp(src_uv + (y >> 16) * (int64_t)src_stride, dst_uv, dst_width);
+ ScaleRowUp(src_uv + (y >> 16) * (intptr_t)src_stride, dst_uv, dst_width);
dst_uv += dst_stride;
y += dy;
}
@@ -697,19 +707,19 @@ void ScaleUVBilinearUp2(int src_width,
assert(src_width == ((dst_width + 1) / 2));
assert(src_height == ((dst_height + 1) / 2));
-#ifdef HAS_SCALEUVROWUP2BILINEAR_SSSE3
+#ifdef HAS_SCALEUVROWUP2_BILINEAR_SSSE3
if (TestCpuFlag(kCpuHasSSSE3)) {
Scale2RowUp = ScaleUVRowUp2_Bilinear_Any_SSSE3;
}
#endif
-#ifdef HAS_SCALEUVROWUP2BILINEAR_AVX2
+#ifdef HAS_SCALEUVROWUP2_BILINEAR_AVX2
if (TestCpuFlag(kCpuHasAVX2)) {
Scale2RowUp = ScaleUVRowUp2_Bilinear_Any_AVX2;
}
#endif
-#ifdef HAS_SCALEUVROWUP2BILINEAR_NEON
+#ifdef HAS_SCALEUVROWUP2_BILINEAR_NEON
if (TestCpuFlag(kCpuHasNEON)) {
Scale2RowUp = ScaleUVRowUp2_Bilinear_Any_NEON;
}
@@ -751,32 +761,32 @@ void ScaleUVLinearUp2_16(int src_width,
// This function can only scale up by 2 times horizontally.
assert(src_width == ((dst_width + 1) / 2));
-#ifdef HAS_SCALEUVROWUP2LINEAR_16_SSE41
+#ifdef HAS_SCALEUVROWUP2_LINEAR_16_SSE41
if (TestCpuFlag(kCpuHasSSE41)) {
ScaleRowUp = ScaleUVRowUp2_Linear_16_Any_SSE41;
}
#endif
-#ifdef HAS_SCALEUVROWUP2LINEAR_16_AVX2
+#ifdef HAS_SCALEUVROWUP2_LINEAR_16_AVX2
if (TestCpuFlag(kCpuHasAVX2)) {
ScaleRowUp = ScaleUVRowUp2_Linear_16_Any_AVX2;
}
#endif
-#ifdef HAS_SCALEUVROWUP2LINEAR_16_NEON
+#ifdef HAS_SCALEUVROWUP2_LINEAR_16_NEON
if (TestCpuFlag(kCpuHasNEON)) {
ScaleRowUp = ScaleUVRowUp2_Linear_16_Any_NEON;
}
#endif
if (dst_height == 1) {
- ScaleRowUp(src_uv + ((src_height - 1) / 2) * (int64_t)src_stride, dst_uv,
+ ScaleRowUp(src_uv + ((src_height - 1) / 2) * (intptr_t)src_stride, dst_uv,
dst_width);
} else {
dy = FixedDiv(src_height - 1, dst_height - 1);
y = (1 << 15) - 1;
for (i = 0; i < dst_height; ++i) {
- ScaleRowUp(src_uv + (y >> 16) * (int64_t)src_stride, dst_uv, dst_width);
+ ScaleRowUp(src_uv + (y >> 16) * (intptr_t)src_stride, dst_uv, dst_width);
dst_uv += dst_stride;
y += dy;
}
@@ -804,19 +814,19 @@ void ScaleUVBilinearUp2_16(int src_width,
assert(src_width == ((dst_width + 1) / 2));
assert(src_height == ((dst_height + 1) / 2));
-#ifdef HAS_SCALEUVROWUP2BILINEAR_16_SSE41
+#ifdef HAS_SCALEUVROWUP2_BILINEAR_16_SSE41
if (TestCpuFlag(kCpuHasSSE41)) {
Scale2RowUp = ScaleUVRowUp2_Bilinear_16_Any_SSE41;
}
#endif
-#ifdef HAS_SCALEUVROWUP2BILINEAR_16_AVX2
+#ifdef HAS_SCALEUVROWUP2_BILINEAR_16_AVX2
if (TestCpuFlag(kCpuHasAVX2)) {
Scale2RowUp = ScaleUVRowUp2_Bilinear_16_Any_AVX2;
}
#endif
-#ifdef HAS_SCALEUVROWUP2BILINEAR_16_NEON
+#ifdef HAS_SCALEUVROWUP2_BILINEAR_16_NEON
if (TestCpuFlag(kCpuHasNEON)) {
Scale2RowUp = ScaleUVRowUp2_Bilinear_16_Any_NEON;
}
@@ -854,7 +864,7 @@ static void ScaleUVSimple(int src_width,
int y,
int dy) {
int j;
- void (*ScaleUVCols)(uint8_t * dst_uv, const uint8_t* src_uv, int dst_width,
+ void (*ScaleUVCols)(uint8_t* dst_uv, const uint8_t* src_uv, int dst_width,
int x, int dx) =
(src_width >= 32768) ? ScaleUVCols64_C : ScaleUVCols_C;
(void)src_height;
@@ -889,7 +899,7 @@ static void ScaleUVSimple(int src_width,
}
for (j = 0; j < dst_height; ++j) {
- ScaleUVCols(dst_uv, src_uv + (y >> 16) * (int64_t)src_stride, dst_width, x,
+ ScaleUVCols(dst_uv, src_uv + (y >> 16) * (intptr_t)src_stride, dst_width, x,
dx);
dst_uv += dst_stride;
y += dy;
@@ -910,7 +920,7 @@ static int UVCopy(const uint8_t* src_uv,
// Negative height means invert the image.
if (height < 0) {
height = -height;
- src_uv = src_uv + (height - 1) * (int64_t)src_stride_uv;
+ src_uv = src_uv + (height - 1) * (intptr_t)src_stride_uv;
src_stride_uv = -src_stride_uv;
}
@@ -930,7 +940,7 @@ static int UVCopy_16(const uint16_t* src_uv,
// Negative height means invert the image.
if (height < 0) {
height = -height;
- src_uv = src_uv + (height - 1) * (int64_t)src_stride_uv;
+ src_uv = src_uv + (height - 1) * (intptr_t)src_stride_uv;
src_stride_uv = -src_stride_uv;
}
@@ -968,7 +978,7 @@ static void ScaleUV(const uint8_t* src,
// Negative src_height means invert the image.
if (src_height < 0) {
src_height = -src_height;
- src = src + (src_height - 1) * (int64_t)src_stride;
+ src = src + (src_height - 1) * (intptr_t)src_stride;
src_stride = -src_stride;
}
ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y,
@@ -983,7 +993,7 @@ static void ScaleUV(const uint8_t* src,
if (clip_y) {
int64_t clipf = (int64_t)(clip_y)*dy;
y += (clipf & 0xffff);
- src += (clipf >> 16) * (int64_t)src_stride;
+ src += (clipf >> 16) * (intptr_t)src_stride;
dst += clip_y * dst_stride;
}
@@ -1024,7 +1034,7 @@ static void ScaleUV(const uint8_t* src,
#ifdef HAS_UVCOPY
if (dx == 0x10000 && dy == 0x10000) {
// Straight copy.
- UVCopy(src + (y >> 16) * (int64_t)src_stride + (x >> 16) * 2,
+ UVCopy(src + (y >> 16) * (intptr_t)src_stride + (x >> 16) * 2,
src_stride, dst, dst_stride, clip_width, clip_height);
return;
}
@@ -1039,7 +1049,7 @@ static void ScaleUV(const uint8_t* src,
dst_stride, src, dst, x, y, dy, /*bpp=*/2, filtering);
return;
}
- if (filtering && (dst_width + 1) / 2 == src_width) {
+ if ((filtering == kFilterLinear) && ((dst_width + 1) / 2 == src_width)) {
ScaleUVLinearUp2(src_width, src_height, clip_width, clip_height, src_stride,
dst_stride, src, dst);
return;
@@ -1118,7 +1128,7 @@ int UVScale_16(const uint16_t* src_uv,
// Negative src_height means invert the image.
if (src_height < 0) {
src_height = -src_height;
- src_uv = src_uv + (src_height - 1) * (int64_t)src_stride_uv;
+ src_uv = src_uv + (src_height - 1) * (intptr_t)src_stride_uv;
src_stride_uv = -src_stride_uv;
}
src_width = Abs(src_width);
@@ -1126,20 +1136,20 @@ int UVScale_16(const uint16_t* src_uv,
#ifdef HAS_UVCOPY
if (!filtering && src_width == dst_width && (src_height % dst_height == 0)) {
if (dst_height == 1) {
- UVCopy_16(src_uv + ((src_height - 1) / 2) * (int64_t)src_stride_uv,
+ UVCopy_16(src_uv + ((src_height - 1) / 2) * (intptr_t)src_stride_uv,
src_stride_uv, dst_uv, dst_stride_uv, dst_width, dst_height);
} else {
dy = src_height / dst_height;
- UVCopy_16(src_uv + ((dy - 1) / 2) * (int64_t)src_stride_uv,
- dy * (int64_t)src_stride_uv, dst_uv, dst_stride_uv, dst_width,
- dst_height);
+ UVCopy_16(src_uv + ((dy - 1) / 2) * (intptr_t)src_stride_uv,
+ (int)(dy * (intptr_t)src_stride_uv), dst_uv, dst_stride_uv,
+ dst_width, dst_height);
}
return 0;
}
#endif
- if (filtering && (dst_width + 1) / 2 == src_width) {
+ if ((filtering == kFilterLinear) && ((dst_width + 1) / 2 == src_width)) {
ScaleUVLinearUp2_16(src_width, src_height, dst_width, dst_height,
src_stride_uv, dst_stride_uv, src_uv, dst_uv);
return 0;
diff --git a/files/tools_libyuv/autoroller/roll_deps.py b/files/tools_libyuv/autoroller/roll_deps.py
index 977c86de..2b57eb65 100755
--- a/files/tools_libyuv/autoroller/roll_deps.py
+++ b/files/tools_libyuv/autoroller/roll_deps.py
@@ -1,18 +1,14 @@
#!/usr/bin/env vpython3
-# Copyright 2017 The LibYuv Project Authors. All rights reserved.
+# Copyright (c) 2017 The LibYUV project authors. All Rights Reserved.
#
# Use of this source code is governed by a BSD-style license
# that can be found in the LICENSE file in the root of the source
# tree. An additional intellectual property rights grant can be found
-# in the file PATENTS. All contributing project authors may
+# in the file PATENTS. All contributing project authors may
# be found in the AUTHORS file in the root of the source tree.
+"""Script to automatically roll dependencies in the LibYUV DEPS file."""
-# This is a modified copy of the script in
-# https://webrtc.googlesource.com/src/+/master/tools_webrtc/autoroller/roll_deps.py
-# customized for libyuv.
-
-"""Script to automatically roll dependencies in the libyuv DEPS file."""
import argparse
import base64
@@ -25,9 +21,46 @@ import sys
import urllib.request
+def FindSrcDirPath():
+ """Returns the abs path to the src/ dir of the project."""
+ src_dir = os.path.dirname(os.path.abspath(__file__))
+ while os.path.basename(src_dir) != 'src':
+ src_dir = os.path.normpath(os.path.join(src_dir, os.pardir))
+ return src_dir
+
+
# Skip these dependencies (list without solution name prefix).
DONT_AUTOROLL_THESE = [
- 'src/third_party/gflags/src',
+ 'src/third_party/gflags/src',
+ 'src/third_party/mockito/src',
+]
+
+# These dependencies are missing in chromium/src/DEPS, either unused or already
+# in-tree. For instance, src/base is a part of the Chromium source git repo,
+# but we pull it through a subtree mirror, so therefore it isn't listed in
+# Chromium's deps but it is in ours.
+LIBYUV_ONLY_DEPS = [
+ 'src/base',
+ 'src/build',
+ 'src/buildtools',
+ 'src/ios',
+ 'src/testing',
+ 'src/third_party',
+ 'src/third_party/android_support_test_runner',
+ 'src/third_party/bazel',
+ 'src/third_party/bouncycastle',
+ 'src/third_party/errorprone/lib',
+ 'src/third_party/findbugs',
+ 'src/third_party/gson',
+ 'src/third_party/gtest-parallel',
+ 'src/third_party/guava',
+ 'src/third_party/intellij',
+ 'src/third_party/jsr-305/src',
+ 'src/third_party/ow2_asm',
+ 'src/third_party/proguard',
+ 'src/third_party/ub-uiautomator/lib',
+ 'src/tools',
+ 'src/tools/clang/dsymutil',
]
LIBYUV_URL = 'https://chromium.googlesource.com/libyuv/libyuv'
@@ -37,16 +70,22 @@ CHROMIUM_LOG_TEMPLATE = CHROMIUM_SRC_URL + '/+log/%s'
CHROMIUM_FILE_TEMPLATE = CHROMIUM_SRC_URL + '/+/%s/%s'
COMMIT_POSITION_RE = re.compile('^Cr-Commit-Position: .*#([0-9]+).*$')
-CLANG_REVISION_RE = re.compile(r'^CLANG_REVISION = \'([0-9a-z-]+)\'$')
+CLANG_REVISION_RE = re.compile(r'^CLANG_REVISION = \'([-0-9a-z]+)\'$')
ROLL_BRANCH_NAME = 'roll_chromium_revision'
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
-CHECKOUT_SRC_DIR = os.path.realpath(os.path.join(SCRIPT_DIR, os.pardir,
- os.pardir))
+CHECKOUT_SRC_DIR = FindSrcDirPath()
CHECKOUT_ROOT_DIR = os.path.realpath(os.path.join(CHECKOUT_SRC_DIR, os.pardir))
+# Copied from tools/android/roll/android_deps/.../BuildConfigGenerator.groovy.
+ANDROID_DEPS_START = r'=== ANDROID_DEPS Generated Code Start ==='
+ANDROID_DEPS_END = r'=== ANDROID_DEPS Generated Code End ==='
+# Location of automically gathered android deps.
+ANDROID_DEPS_PATH = 'src/third_party/android_deps/'
+
sys.path.append(os.path.join(CHECKOUT_SRC_DIR, 'build'))
-import find_depot_tools # pylint: disable=wrong-import-position
+import find_depot_tools
+
find_depot_tools.add_depot_tools_to_path()
CLANG_UPDATE_SCRIPT_URL_PATH = 'tools/clang/scripts/update.py'
@@ -56,11 +95,26 @@ CLANG_UPDATE_SCRIPT_LOCAL_PATH = os.path.join(CHECKOUT_SRC_DIR, 'tools',
DepsEntry = collections.namedtuple('DepsEntry', 'path url revision')
ChangedDep = collections.namedtuple('ChangedDep',
'path url current_rev new_rev')
+CipdDepsEntry = collections.namedtuple('CipdDepsEntry', 'path packages')
+VersionEntry = collections.namedtuple('VersionEntry', 'version')
+ChangedCipdPackage = collections.namedtuple(
+ 'ChangedCipdPackage', 'path package current_version new_version')
+ChangedVersionEntry = collections.namedtuple(
+ 'ChangedVersionEntry', 'path current_version new_version')
+
+ChromiumRevisionUpdate = collections.namedtuple('ChromiumRevisionUpdate',
+ ('current_chromium_rev '
+ 'new_chromium_rev '))
+
class RollError(Exception):
pass
+def StrExpansion():
+ return lambda str_value: str_value
+
+
def VarLookup(local_scope):
return lambda var_name: local_scope['vars'][var_name]
@@ -68,9 +122,9 @@ def VarLookup(local_scope):
def ParseDepsDict(deps_content):
local_scope = {}
global_scope = {
- 'Var': VarLookup(local_scope),
- 'Str': lambda s: s,
- 'deps_os': {},
+ 'Str': StrExpansion(),
+ 'Var': VarLookup(local_scope),
+ 'deps_os': {},
}
exec(deps_content, global_scope, local_scope)
return local_scope
@@ -82,11 +136,6 @@ def ParseLocalDepsFile(filename):
return ParseDepsDict(deps_content)
-def ParseRemoteCrDepsFile(revision):
- deps_content = ReadRemoteCrFile('DEPS', revision)
- return ParseDepsDict(deps_content)
-
-
def ParseCommitPosition(commit_message):
for line in reversed(commit_message.splitlines()):
m = COMMIT_POSITION_RE.match(line.strip())
@@ -97,15 +146,18 @@ def ParseCommitPosition(commit_message):
sys.exit(-1)
-def _RunCommand(command, working_dir=None, ignore_exit_code=False,
- extra_env=None, input_data=None):
+def _RunCommand(command,
+ working_dir=None,
+ ignore_exit_code=False,
+ extra_env=None,
+ input_data=None):
"""Runs a command and returns the output from that command.
- If the command fails (exit code != 0), the function will exit the process.
+ If the command fails (exit code != 0), the function will exit the process.
- Returns:
- A tuple containing the stdout and stderr outputs as strings.
- """
+ Returns:
+ A tuple containing the stdout and stderr outputs as strings.
+ """
working_dir = working_dir or CHECKOUT_SRC_DIR
logging.debug('CMD: %s CWD: %s', ' '.join(command), working_dir)
env = os.environ.copy()
@@ -134,9 +186,9 @@ def _RunCommand(command, working_dir=None, ignore_exit_code=False,
def _GetBranches():
"""Returns a tuple of active,branches.
- The 'active' is the name of the currently active branch and 'branches' is a
- list of all branches.
- """
+ The 'active' is the name of the currently active branch and 'branches' is a
+ list of all branches.
+ """
lines = _RunCommand(['git', 'branch'])[0].split('\n')
branches = []
active = ''
@@ -160,9 +212,16 @@ def _ReadGitilesContent(url):
def ReadRemoteCrFile(path_below_src, revision):
- """Reads a remote Chromium file of a specific revision. Returns a string."""
- return _ReadGitilesContent(CHROMIUM_FILE_TEMPLATE % (revision,
- path_below_src))
+ """Reads a remote Chromium file of a specific revision.
+
+ Args:
+ path_below_src: A path to the target file relative to src dir.
+ revision: Revision to read.
+ Returns:
+ A string with file content.
+ """
+ return _ReadGitilesContent(CHROMIUM_FILE_TEMPLATE %
+ (revision, path_below_src))
def ReadRemoteCrCommit(revision):
@@ -171,7 +230,13 @@ def ReadRemoteCrCommit(revision):
def ReadUrlContent(url):
- """Connect to a remote host and read the contents. Returns a list of lines."""
+ """Connect to a remote host and read the contents.
+
+ Args:
+ url: URL to connect to.
+ Returns:
+ A list of lines.
+ """
conn = urllib.request.urlopen(url)
try:
return conn.readlines()
@@ -185,52 +250,172 @@ def ReadUrlContent(url):
def GetMatchingDepsEntries(depsentry_dict, dir_path):
"""Gets all deps entries matching the provided path.
- This list may contain more than one DepsEntry object.
- Example: dir_path='src/testing' would give results containing both
- 'src/testing/gtest' and 'src/testing/gmock' deps entries for Chromium's DEPS.
- Example 2: dir_path='src/build' should return 'src/build' but not
- 'src/buildtools'.
+ This list may contain more than one DepsEntry object.
+ Example: dir_path='src/testing' would give results containing both
+ 'src/testing/gtest' and 'src/testing/gmock' deps entries for Chromium's
+ DEPS.
+ Example 2: dir_path='src/build' should return 'src/build' but not
+ 'src/buildtools'.
- Returns:
- A list of DepsEntry objects.
- """
+ Returns:
+ A list of DepsEntry objects.
+ """
result = []
for path, depsentry in depsentry_dict.items():
if path == dir_path:
result.append(depsentry)
else:
parts = path.split('/')
- if all(part == parts[i]
- for i, part in enumerate(dir_path.split('/'))):
+ if all(part == parts[i] for i, part in enumerate(dir_path.split('/'))):
result.append(depsentry)
return result
+
def BuildDepsentryDict(deps_dict):
"""Builds a dict of paths to DepsEntry objects from a raw deps dict."""
result = {}
def AddDepsEntries(deps_subdict):
- for path, deps_url_spec in deps_subdict.items():
- if isinstance(deps_url_spec, dict):
- if deps_url_spec.get('dep_type') == 'cipd':
- continue
- deps_url = deps_url_spec['url']
+ for path, dep in deps_subdict.items():
+ if path in result:
+ continue
+ if not isinstance(dep, dict):
+ dep = {'url': dep}
+ if dep.get('dep_type') == 'cipd':
+ result[path] = CipdDepsEntry(path, dep['packages'])
else:
- deps_url = deps_url_spec
- if not path in result:
- url, revision = deps_url.split('@') if deps_url else (None, None)
+ if '@' not in dep['url']:
+ continue
+ url, revision = dep['url'].split('@')
result[path] = DepsEntry(path, url, revision)
+ def AddVersionEntry(vars_subdict):
+ for key, value in vars_subdict.items():
+ if key in result:
+ continue
+ if not key.endswith('_version'):
+ continue
+ key = re.sub('_version$', '', key)
+ result[key] = VersionEntry(value)
+
AddDepsEntries(deps_dict['deps'])
- for deps_os in ['win', 'mac', 'linux', 'android', 'ios', 'unix']:
+ for deps_os in ['win', 'mac', 'unix', 'android', 'ios', 'unix']:
AddDepsEntries(deps_dict.get('deps_os', {}).get(deps_os, {}))
+ AddVersionEntry(deps_dict.get('vars', {}))
return result
+def _FindChangedCipdPackages(path, old_pkgs, new_pkgs):
+ old_pkgs_names = {p['package'] for p in old_pkgs}
+ new_pkgs_names = {p['package'] for p in new_pkgs}
+ pkgs_equal = (old_pkgs_names == new_pkgs_names)
+ added_pkgs = [p for p in new_pkgs_names if p not in old_pkgs_names]
+ removed_pkgs = [p for p in old_pkgs_names if p not in new_pkgs_names]
+
+ assert pkgs_equal, ('Old: %s\n New: %s.\nYou need to do a manual roll '
+ 'and remove/add entries in DEPS so the old and new '
+ 'list match.\nMost likely, you should add \"%s\" and '
+ 'remove \"%s\"' %
+ (old_pkgs, new_pkgs, added_pkgs, removed_pkgs))
+
+ for old_pkg in old_pkgs:
+ for new_pkg in new_pkgs:
+ old_version = old_pkg['version']
+ new_version = new_pkg['version']
+ if (old_pkg['package'] == new_pkg['package']
+ and old_version != new_version):
+ logging.debug('Roll dependency %s to %s', path, new_version)
+ yield ChangedCipdPackage(path, old_pkg['package'], old_version,
+ new_version)
+
+
+def _FindChangedVars(name, old_version, new_version):
+ if old_version != new_version:
+ logging.debug('Roll dependency %s to %s', name, new_version)
+ yield ChangedVersionEntry(name, old_version, new_version)
+
+
+def _FindNewDeps(old, new):
+ """ Gather dependencies only in `new` and return corresponding paths. """
+ old_entries = set(BuildDepsentryDict(old))
+ new_entries = set(BuildDepsentryDict(new))
+ return [
+ path for path in new_entries - old_entries
+ if path not in DONT_AUTOROLL_THESE
+ ]
+
+
+def FindAddedDeps(libyuv_deps, new_cr_deps):
+ """
+ Calculate new deps entries of interest.
+
+ Ideally, that would mean: only appearing in chromium DEPS
+ but transitively used in LibYUV.
+
+ Since it's hard to compute, we restrict ourselves to a well defined subset:
+ deps sitting in `ANDROID_DEPS_PATH`.
+ Otherwise, assumes that's a Chromium-only dependency.
+
+ Args:
+ libyuv_deps: dict of deps as defined in the LibYUV DEPS file.
+ new_cr_deps: dict of deps as defined in the chromium DEPS file.
+
+ Caveat: Doesn't detect a new package in existing dep.
+
+ Returns:
+ A tuple consisting of:
+ A list of paths added dependencies sitting in `ANDROID_DEPS_PATH`.
+ A list of paths for other added dependencies.
+ """
+ all_added_deps = _FindNewDeps(libyuv_deps, new_cr_deps)
+ generated_android_deps = [
+ path for path in all_added_deps if path.startswith(ANDROID_DEPS_PATH)
+ ]
+ other_deps = [
+ path for path in all_added_deps if path not in generated_android_deps
+ ]
+ return generated_android_deps, other_deps
+
+
+def FindRemovedDeps(libyuv_deps, new_cr_deps):
+ """
+ Calculate obsolete deps entries.
+
+ Ideally, that would mean: no more appearing in chromium DEPS
+ and not used in LibYUV.
+
+ Since it's hard to compute:
+ 1/ We restrict ourselves to a well defined subset:
+ deps sitting in `ANDROID_DEPS_PATH`.
+ 2/ We rely on existing behavior of CalculateChangeDeps.
+ I.e. Assumes non-CIPD dependencies are LibYUV-only, don't remove them.
+
+ Args:
+ libyuv_deps: dict of deps as defined in the LibYUV DEPS file.
+ new_cr_deps: dict of deps as defined in the chromium DEPS file.
+
+ Caveat: Doesn't detect a deleted package in existing dep.
+
+ Returns:
+ A tuple consisting of:
+ A list of paths of dependencies removed from `ANDROID_DEPS_PATH`.
+ A list of paths of unexpected disappearing dependencies.
+ """
+ all_removed_deps = _FindNewDeps(new_cr_deps, libyuv_deps)
+ generated_android_deps = sorted(
+ [path for path in all_removed_deps if path.startswith(ANDROID_DEPS_PATH)])
+ # Webrtc-only dependencies are handled in CalculateChangedDeps.
+ other_deps = sorted([
+ path for path in all_removed_deps
+ if path not in generated_android_deps and path not in LIBYUV_ONLY_DEPS
+ ])
+ return generated_android_deps, other_deps
+
+
def CalculateChangedDeps(libyuv_deps, new_cr_deps):
"""
- Calculate changed deps entries based on entries defined in the libyuv DEPS
- file:
+ Calculate changed deps entries based on entries defined in the LibYUV DEPS
+ file:
- If a shared dependency with the Chromium DEPS file: roll it to the same
revision as Chromium (i.e. entry in the new_cr_deps dict)
- If it's a Chromium sub-directory, roll it to the HEAD revision (notice
@@ -239,9 +424,9 @@ def CalculateChangedDeps(libyuv_deps, new_cr_deps):
- If it's another DEPS entry (not shared with Chromium), roll it to HEAD
unless it's configured to be skipped.
- Returns:
- A list of ChangedDep objects representing the changed deps.
- """
+ Returns:
+ A list of ChangedDep objects representing the changed deps.
+ """
result = []
libyuv_entries = BuildDepsentryDict(libyuv_deps)
new_cr_entries = BuildDepsentryDict(new_cr_deps)
@@ -250,68 +435,117 @@ def CalculateChangedDeps(libyuv_deps, new_cr_deps):
continue
cr_deps_entry = new_cr_entries.get(path)
if cr_deps_entry:
+ assert type(cr_deps_entry) is type(libyuv_deps_entry)
+
+ if isinstance(cr_deps_entry, CipdDepsEntry):
+ result.extend(
+ _FindChangedCipdPackages(path, libyuv_deps_entry.packages,
+ cr_deps_entry.packages))
+ continue
+
+ if isinstance(cr_deps_entry, VersionEntry):
+ result.extend(
+ _FindChangedVars(path, libyuv_deps_entry.version,
+ cr_deps_entry.version))
+ continue
+
# Use the revision from Chromium's DEPS file.
new_rev = cr_deps_entry.revision
assert libyuv_deps_entry.url == cr_deps_entry.url, (
- 'Libyuv DEPS entry %s has a different URL (%s) than Chromium (%s).' %
- (path, libyuv_deps_entry.url, cr_deps_entry.url))
+ 'LibYUV DEPS entry %s has a different URL %s than Chromium %s.' %
+ (path, libyuv_deps_entry.url, cr_deps_entry.url))
else:
- # Use the HEAD of the deps repo.
- stdout, _ = _RunCommand(['git', 'ls-remote', libyuv_deps_entry.url,
- 'HEAD'])
- new_rev = stdout.strip().split('\t')[0]
+ if isinstance(libyuv_deps_entry, DepsEntry):
+ # Use the HEAD of the deps repo.
+ stdout, _ = _RunCommand(
+ ['git', 'ls-remote', libyuv_deps_entry.url, 'HEAD'])
+ new_rev = stdout.strip().split('\t')[0]
+ else:
+ # The dependency has been removed from chromium.
+ # This is handled by FindRemovedDeps.
+ continue
# Check if an update is necessary.
if libyuv_deps_entry.revision != new_rev:
logging.debug('Roll dependency %s to %s', path, new_rev)
- result.append(ChangedDep(path, libyuv_deps_entry.url,
- libyuv_deps_entry.revision, new_rev))
+ result.append(
+ ChangedDep(path, libyuv_deps_entry.url, libyuv_deps_entry.revision,
+ new_rev))
return sorted(result)
def CalculateChangedClang(new_cr_rev):
+
def GetClangRev(lines):
for line in lines:
match = CLANG_REVISION_RE.match(line)
if match:
return match.group(1)
- raise RollError('Could not parse Clang revision from:\n' + '\n'.join(' ' + l for l in lines))
+ raise RollError('Could not parse Clang revision!')
with open(CLANG_UPDATE_SCRIPT_LOCAL_PATH, 'r') as f:
current_lines = f.readlines()
current_rev = GetClangRev(current_lines)
new_clang_update_py = ReadRemoteCrFile(CLANG_UPDATE_SCRIPT_URL_PATH,
- new_cr_rev).splitlines()
+ new_cr_rev).splitlines()
new_rev = GetClangRev(new_clang_update_py)
return ChangedDep(CLANG_UPDATE_SCRIPT_LOCAL_PATH, None, current_rev, new_rev)
-def GenerateCommitMessage(current_cr_rev, new_cr_rev, current_commit_pos,
- new_commit_pos, changed_deps_list, clang_change):
- current_cr_rev = current_cr_rev[0:10]
- new_cr_rev = new_cr_rev[0:10]
+def GenerateCommitMessage(
+ rev_update,
+ current_commit_pos,
+ new_commit_pos,
+ changed_deps_list,
+ added_deps_paths=None,
+ removed_deps_paths=None,
+ clang_change=None,
+):
+ current_cr_rev = rev_update.current_chromium_rev[0:10]
+ new_cr_rev = rev_update.new_chromium_rev[0:10]
rev_interval = '%s..%s' % (current_cr_rev, new_cr_rev)
git_number_interval = '%s:%s' % (current_commit_pos, new_commit_pos)
- commit_msg = ['Roll chromium_revision %s (%s)\n' % (rev_interval,
- git_number_interval)]
- commit_msg.append('Change log: %s' % (CHROMIUM_LOG_TEMPLATE % rev_interval))
- commit_msg.append('Full diff: %s\n' % (CHROMIUM_COMMIT_TEMPLATE %
- rev_interval))
+ commit_msg = [
+ 'Roll chromium_revision %s (%s)\n' % (rev_interval, git_number_interval),
+ 'Change log: %s' % (CHROMIUM_LOG_TEMPLATE % rev_interval),
+ 'Full diff: %s\n' % (CHROMIUM_COMMIT_TEMPLATE % rev_interval)
+ ]
+
+ def Section(adjective, deps):
+ noun = 'dependency' if len(deps) == 1 else 'dependencies'
+ commit_msg.append('%s %s' % (adjective, noun))
+
if changed_deps_list:
- commit_msg.append('Changed dependencies:')
+ Section('Changed', changed_deps_list)
for c in changed_deps_list:
- commit_msg.append('* %s: %s/+log/%s..%s' % (c.path, c.url,
- c.current_rev[0:10],
- c.new_rev[0:10]))
+ if isinstance(c, ChangedCipdPackage):
+ commit_msg.append('* %s: %s..%s' %
+ (c.path, c.current_version, c.new_version))
+ elif isinstance(c, ChangedVersionEntry):
+ commit_msg.append('* %s_vesion: %s..%s' %
+ (c.path, c.current_version, c.new_version))
+ else:
+ commit_msg.append('* %s: %s/+log/%s..%s' %
+ (c.path, c.url, c.current_rev[0:10], c.new_rev[0:10]))
+
+ if added_deps_paths:
+ Section('Added', added_deps_paths)
+ commit_msg.extend('* %s' % p for p in added_deps_paths)
+
+ if removed_deps_paths:
+ Section('Removed', removed_deps_paths)
+ commit_msg.extend('* %s' % p for p in removed_deps_paths)
+
+ if any([changed_deps_list, added_deps_paths, removed_deps_paths]):
change_url = CHROMIUM_FILE_TEMPLATE % (rev_interval, 'DEPS')
commit_msg.append('DEPS diff: %s\n' % change_url)
else:
commit_msg.append('No dependencies changed.')
- if clang_change.current_rev != clang_change.new_rev:
+ if clang_change and clang_change.current_rev != clang_change.new_rev:
commit_msg.append('Clang version changed %s:%s' %
(clang_change.current_rev, clang_change.new_rev))
change_url = CHROMIUM_FILE_TEMPLATE % (rev_interval,
@@ -320,38 +554,61 @@ def GenerateCommitMessage(current_cr_rev, new_cr_rev, current_commit_pos,
else:
commit_msg.append('No update to Clang.\n')
- # TBR needs to be non-empty for Gerrit to process it.
- git_author = _RunCommand(['git', 'config', 'user.email'],
- working_dir=CHECKOUT_SRC_DIR)[0].strip()
- commit_msg.append('TBR=%s' % git_author)
-
commit_msg.append('BUG=None')
return '\n'.join(commit_msg)
-def UpdateDepsFile(deps_filename, old_cr_revision, new_cr_revision,
- changed_deps):
+def UpdateDepsFile(deps_filename, rev_update, changed_deps, new_cr_content):
"""Update the DEPS file with the new revision."""
- # Update the chromium_revision variable.
with open(deps_filename, 'rb') as deps_file:
deps_content = deps_file.read().decode('utf-8')
- deps_content = deps_content.replace(old_cr_revision, new_cr_revision)
+
+ # Update the chromium_revision variable.
+ deps_content = deps_content.replace(rev_update.current_chromium_rev,
+ rev_update.new_chromium_rev)
+
+ # Add and remove dependencies. For now: only generated android deps.
+ # Since gclient cannot add or remove deps, we on the fact that
+ # these android deps are located in one place we can copy/paste.
+ deps_re = re.compile(ANDROID_DEPS_START + '.*' + ANDROID_DEPS_END, re.DOTALL)
+ new_deps = deps_re.search(new_cr_content)
+ old_deps = deps_re.search(deps_content)
+ if not new_deps or not old_deps:
+ faulty = 'Chromium' if not new_deps else 'LibYUV'
+ raise RollError('Was expecting to find "%s" and "%s"\n'
+ 'in %s DEPS' %
+ (ANDROID_DEPS_START, ANDROID_DEPS_END, faulty))
+ deps_content = deps_re.sub(new_deps.group(0), deps_content)
+
+ for dep in changed_deps:
+ if isinstance(dep, ChangedVersionEntry):
+ deps_content = deps_content.replace(dep.current_version, dep.new_version)
+
with open(deps_filename, 'wb') as deps_file:
deps_file.write(deps_content.encode('utf-8'))
# Update each individual DEPS entry.
for dep in changed_deps:
+ # ChangedVersionEntry types are already been processed.
+ if isinstance(dep, ChangedVersionEntry):
+ continue
local_dep_dir = os.path.join(CHECKOUT_ROOT_DIR, dep.path)
if not os.path.isdir(local_dep_dir):
raise RollError(
- 'Cannot find local directory %s. Make sure the .gclient file\n'
- 'contains all platforms in the target_os list, i.e.\n'
+ 'Cannot find local directory %s. Either run\n'
+ 'gclient sync --deps=all\n'
+ 'or make sure the .gclient file for your solution contains all '
+ 'platforms in the target_os list, i.e.\n'
'target_os = ["android", "unix", "mac", "ios", "win"];\n'
'Then run "gclient sync" again.' % local_dep_dir)
- _RunCommand(
- ['gclient', 'setdep', '--revision', '%s@%s' % (dep.path, dep.new_rev)],
- working_dir=CHECKOUT_SRC_DIR)
+ if isinstance(dep, ChangedCipdPackage):
+ package = dep.package.format() # Eliminate double curly brackets
+ update = '%s:%s@%s' % (dep.path, package, dep.new_version)
+ else:
+ update = '%s@%s' % (dep.path, dep.new_rev)
+ _RunCommand(['gclient', 'setdep', '--revision', update],
+ working_dir=CHECKOUT_SRC_DIR)
def _IsTreeClean():
@@ -363,9 +620,9 @@ def _IsTreeClean():
return False
-def _EnsureUpdatedMasterBranch(dry_run):
- current_branch = _RunCommand(
- ['git', 'rev-parse', '--abbrev-ref', 'HEAD'])[0].splitlines()[0]
+def _EnsureUpdatedMainBranch(dry_run):
+ current_branch = _RunCommand(['git', 'rev-parse', '--abbrev-ref',
+ 'HEAD'])[0].splitlines()[0]
if current_branch != 'main':
logging.error('Please checkout the main branch and re-run this script.')
if not dry_run:
@@ -407,19 +664,34 @@ def ChooseCQMode(skip_cq, cq_over, current_commit_pos, new_commit_pos):
return 2
-def _UploadCL(commit_queue_mode):
+def _GetCcRecipients(changed_deps_list):
+ """Returns a list of emails to notify based on the changed deps list.
+ """
+ cc_recipients = []
+ for c in changed_deps_list:
+ pass
+ return cc_recipients
+
+
+def _UploadCL(commit_queue_mode, add_cc=None):
"""Upload the committed changes as a changelist to Gerrit.
- commit_queue_mode:
- - 2: Submit to commit queue.
- - 1: Run trybots but do not submit to CQ.
- - 0: Skip CQ, upload only.
- """
- cmd = ['git', 'cl', 'upload', '--force', '--bypass-hooks', '--send-mail']
+ commit_queue_mode:
+ - 2: Submit to commit queue.
+ - 1: Run trybots but do not submit to CQ.
+ - 0: Skip CQ, upload only.
+
+ add_cc: A list of email addresses to add as CC recipients.
+ """
+ cc_recipients = []
+ if add_cc:
+ cc_recipients.extend(add_cc)
+ cmd = ['git', 'cl', 'upload', '--force', '--bypass-hooks']
if commit_queue_mode >= 2:
logging.info('Sending the CL to the CQ...')
cmd.extend(['-o', 'label=Bot-Commit+1'])
cmd.extend(['-o', 'label=Commit-Queue+2'])
+ cmd.extend(['--send-mail', '--cc', ','.join(cc_recipients)])
elif commit_queue_mode >= 1:
logging.info('Starting CQ dry run...')
cmd.extend(['-o', 'label=Commit-Queue+1'])
@@ -429,31 +701,57 @@ def _UploadCL(commit_queue_mode):
}
stdout, stderr = _RunCommand(cmd, extra_env=extra_env)
logging.debug('Output from "git cl upload":\nstdout:\n%s\n\nstderr:\n%s',
- stdout, stderr)
+ stdout, stderr)
+
+
+def GetRollRevisionRanges(opts, libyuv_deps):
+ current_cr_rev = libyuv_deps['vars']['chromium_revision']
+ new_cr_rev = opts.revision
+ if not new_cr_rev:
+ stdout, _ = _RunCommand(['git', 'ls-remote', CHROMIUM_SRC_URL, 'HEAD'])
+ head_rev = stdout.strip().split('\t')[0]
+ logging.info('No revision specified. Using HEAD: %s', head_rev)
+ new_cr_rev = head_rev
+
+ return ChromiumRevisionUpdate(current_cr_rev, new_cr_rev)
def main():
p = argparse.ArgumentParser()
- p.add_argument('--clean', action='store_true', default=False,
+ p.add_argument('--clean',
+ action='store_true',
+ default=False,
help='Removes any previous local roll branch.')
- p.add_argument('-r', '--revision',
+ p.add_argument('-r',
+ '--revision',
help=('Chromium Git revision to roll to. Defaults to the '
'Chromium HEAD revision if omitted.'))
- p.add_argument('--dry-run', action='store_true', default=False,
+ p.add_argument('--dry-run',
+ action='store_true',
+ default=False,
help=('Calculate changes and modify DEPS, but don\'t create '
'any local branch, commit, upload CL or send any '
'tryjobs.'))
- p.add_argument('-i', '--ignore-unclean-workdir', action='store_true',
+ p.add_argument('-i',
+ '--ignore-unclean-workdir',
+ action='store_true',
default=False,
help=('Ignore if the current branch is not main or if there '
'are uncommitted changes (default: %(default)s).'))
grp = p.add_mutually_exclusive_group()
- grp.add_argument('--skip-cq', action='store_true', default=False,
+ grp.add_argument('--skip-cq',
+ action='store_true',
+ default=False,
help='Skip sending the CL to the CQ (default: %(default)s)')
- grp.add_argument('--cq-over', type=int, default=1,
+ grp.add_argument('--cq-over',
+ type=int,
+ default=1,
help=('Commit queue dry run if the revision difference '
'is below this number (default: %(default)s)'))
- p.add_argument('-v', '--verbose', action='store_true', default=False,
+ p.add_argument('-v',
+ '--verbose',
+ action='store_true',
+ default=False,
help='Be extra verbose in printing of log messages.')
opts = p.parse_args()
@@ -470,38 +768,52 @@ def main():
_RemovePreviousRollBranch(opts.dry_run)
if not opts.ignore_unclean_workdir:
- _EnsureUpdatedMasterBranch(opts.dry_run)
-
- new_cr_rev = opts.revision
- if not new_cr_rev:
- stdout, _ = _RunCommand(['git', 'ls-remote', CHROMIUM_SRC_URL, 'HEAD'])
- head_rev = stdout.strip().split('\t')[0]
- logging.info('No revision specified. Using HEAD: %s', head_rev)
- new_cr_rev = head_rev
+ _EnsureUpdatedMainBranch(opts.dry_run)
deps_filename = os.path.join(CHECKOUT_SRC_DIR, 'DEPS')
libyuv_deps = ParseLocalDepsFile(deps_filename)
- current_cr_rev = libyuv_deps['vars']['chromium_revision']
- current_commit_pos = ParseCommitPosition(ReadRemoteCrCommit(current_cr_rev))
- new_commit_pos = ParseCommitPosition(ReadRemoteCrCommit(new_cr_rev))
+ rev_update = GetRollRevisionRanges(opts, libyuv_deps)
- new_cr_deps = ParseRemoteCrDepsFile(new_cr_rev)
+ current_commit_pos = ParseCommitPosition(
+ ReadRemoteCrCommit(rev_update.current_chromium_rev))
+ new_commit_pos = ParseCommitPosition(
+ ReadRemoteCrCommit(rev_update.new_chromium_rev))
+
+ new_cr_content = ReadRemoteCrFile('DEPS', rev_update.new_chromium_rev)
+ new_cr_deps = ParseDepsDict(new_cr_content)
changed_deps = CalculateChangedDeps(libyuv_deps, new_cr_deps)
- clang_change = CalculateChangedClang(new_cr_rev)
- commit_msg = GenerateCommitMessage(current_cr_rev, new_cr_rev,
- current_commit_pos, new_commit_pos,
- changed_deps, clang_change)
+ # Discard other deps, assumed to be chromium-only dependencies.
+ new_generated_android_deps, _ = FindAddedDeps(libyuv_deps, new_cr_deps)
+ removed_generated_android_deps, other_deps = FindRemovedDeps(
+ libyuv_deps, new_cr_deps)
+ if other_deps:
+ raise RollError('LibYUV DEPS entries are missing from Chromium: %s.\n'
+ 'Remove them or add them to either '
+ 'LIBYUV_ONLY_DEPS or DONT_AUTOROLL_THESE.' % other_deps)
+ clang_change = CalculateChangedClang(rev_update.new_chromium_rev)
+ commit_msg = GenerateCommitMessage(
+ rev_update,
+ current_commit_pos,
+ new_commit_pos,
+ changed_deps,
+ added_deps_paths=new_generated_android_deps,
+ removed_deps_paths=removed_generated_android_deps,
+ clang_change=clang_change)
logging.debug('Commit message:\n%s', commit_msg)
_CreateRollBranch(opts.dry_run)
- UpdateDepsFile(deps_filename, current_cr_rev, new_cr_rev, changed_deps)
- _LocalCommit(commit_msg, opts.dry_run)
- commit_queue_mode = ChooseCQMode(opts.skip_cq, opts.cq_over,
- current_commit_pos, new_commit_pos)
- logging.info('Uploading CL...')
if not opts.dry_run:
- _UploadCL(commit_queue_mode)
+ UpdateDepsFile(deps_filename, rev_update, changed_deps, new_cr_content)
+ if _IsTreeClean():
+ logging.info("No DEPS changes detected, skipping CL creation.")
+ else:
+ _LocalCommit(commit_msg, opts.dry_run)
+ commit_queue_mode = ChooseCQMode(opts.skip_cq, opts.cq_over,
+ current_commit_pos, new_commit_pos)
+ logging.info('Uploading CL...')
+ if not opts.dry_run:
+ _UploadCL(commit_queue_mode, _GetCcRecipients(changed_deps))
return 0
diff --git a/files/unit_test/convert_test.cc b/files/unit_test/convert_test.cc
index 1f975825..1f1896b0 100644
--- a/files/unit_test/convert_test.cc
+++ b/files/unit_test/convert_test.cc
@@ -48,6 +48,7 @@ namespace libyuv {
#define AR30ToAR30 ARGBCopy
#define ABGRToABGR ARGBCopy
+// subsample amount uses a divide.
#define SUBSAMPLE(v, a) ((((v) + (a)-1)) / (a))
// Planar test
@@ -180,9 +181,12 @@ TESTPLANARTOP(I212, uint16_t, 2, 2, 1, I012, uint16_t, 2, 2, 2, 12)
TESTPLANARTOP(I010, uint16_t, 2, 2, 2, I420, uint8_t, 1, 2, 2, 10)
TESTPLANARTOP(I210, uint16_t, 2, 2, 1, I420, uint8_t, 1, 2, 2, 10)
TESTPLANARTOP(I210, uint16_t, 2, 2, 1, I422, uint8_t, 1, 2, 1, 10)
+TESTPLANARTOP(I410, uint16_t, 2, 1, 1, I420, uint8_t, 1, 2, 2, 10)
TESTPLANARTOP(I410, uint16_t, 2, 1, 1, I444, uint8_t, 1, 1, 1, 10)
TESTPLANARTOP(I012, uint16_t, 2, 2, 2, I420, uint8_t, 1, 2, 2, 12)
+TESTPLANARTOP(I212, uint16_t, 2, 2, 1, I420, uint8_t, 1, 2, 2, 12)
TESTPLANARTOP(I212, uint16_t, 2, 2, 1, I422, uint8_t, 1, 2, 1, 12)
+TESTPLANARTOP(I412, uint16_t, 2, 1, 1, I420, uint8_t, 1, 2, 2, 12)
TESTPLANARTOP(I412, uint16_t, 2, 1, 1, I444, uint8_t, 1, 1, 1, 12)
// Test Android 420 to I420
@@ -417,131 +421,136 @@ TESTPLANARTOBP(I210, uint16_t, 2, 2, 1, P210, uint16_t, 2, 2, 1, 10)
TESTPLANARTOBP(I012, uint16_t, 2, 2, 2, P012, uint16_t, 2, 2, 2, 12)
TESTPLANARTOBP(I212, uint16_t, 2, 2, 1, P212, uint16_t, 2, 2, 1, 12)
-#define TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
- SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, \
- DST_SUBSAMP_X, DST_SUBSAMP_Y, W1280, N, NEG, OFF, \
- DOY, SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT) \
- TEST_F(LibYUVConvertTest, SRC_FMT_PLANAR##To##FMT_PLANAR##N) { \
- static_assert(SRC_BPC == 1 || SRC_BPC == 2, "SRC BPC unsupported"); \
- static_assert(DST_BPC == 1 || DST_BPC == 2, "DST BPC unsupported"); \
- static_assert(SRC_SUBSAMP_X == 1 || SRC_SUBSAMP_X == 2, \
- "SRC_SUBSAMP_X unsupported"); \
- static_assert(SRC_SUBSAMP_Y == 1 || SRC_SUBSAMP_Y == 2, \
- "SRC_SUBSAMP_Y unsupported"); \
- static_assert(DST_SUBSAMP_X == 1 || DST_SUBSAMP_X == 2, \
- "DST_SUBSAMP_X unsupported"); \
- static_assert(DST_SUBSAMP_Y == 1 || DST_SUBSAMP_Y == 2, \
- "DST_SUBSAMP_Y unsupported"); \
- const int kWidth = W1280; \
- const int kHeight = benchmark_height_; \
- const int kSrcHalfWidth = SUBSAMPLE(kWidth, SRC_SUBSAMP_X); \
- const int kDstHalfWidth = SUBSAMPLE(kWidth, DST_SUBSAMP_X); \
- const int kDstHalfHeight = SUBSAMPLE(kHeight, DST_SUBSAMP_Y); \
- const int kPaddedWidth = (kWidth + (TILE_WIDTH - 1)) & ~(TILE_WIDTH - 1); \
- const int kPaddedHeight = \
- (kHeight + (TILE_HEIGHT - 1)) & ~(TILE_HEIGHT - 1); \
- const int kSrcHalfPaddedWidth = SUBSAMPLE(kPaddedWidth, SRC_SUBSAMP_X); \
- const int kSrcHalfPaddedHeight = SUBSAMPLE(kPaddedHeight, SRC_SUBSAMP_Y); \
- align_buffer_page_end(src_y, kPaddedWidth* kPaddedHeight* SRC_BPC + OFF); \
- align_buffer_page_end( \
- src_uv, \
- 2 * kSrcHalfPaddedWidth * kSrcHalfPaddedHeight * SRC_BPC + OFF); \
- align_buffer_page_end(dst_y_c, kWidth* kHeight* DST_BPC); \
- align_buffer_page_end(dst_uv_c, \
- 2 * kDstHalfWidth * kDstHalfHeight * DST_BPC); \
- align_buffer_page_end(dst_y_opt, kWidth* kHeight* DST_BPC); \
- align_buffer_page_end(dst_uv_opt, \
- 2 * kDstHalfWidth * kDstHalfHeight * DST_BPC); \
- SRC_T* src_y_p = reinterpret_cast<SRC_T*>(src_y + OFF); \
- SRC_T* src_uv_p = reinterpret_cast<SRC_T*>(src_uv + OFF); \
- for (int i = 0; i < kPaddedWidth * kPaddedHeight; ++i) { \
- src_y_p[i] = \
- (fastrand() & (((SRC_T)(-1)) << ((8 * SRC_BPC) - SRC_DEPTH))); \
- } \
- for (int i = 0; i < kSrcHalfPaddedWidth * kSrcHalfPaddedHeight * 2; ++i) { \
- src_uv_p[i] = \
- (fastrand() & (((SRC_T)(-1)) << ((8 * SRC_BPC) - SRC_DEPTH))); \
- } \
- memset(dst_y_c, 1, kWidth* kHeight* DST_BPC); \
- memset(dst_uv_c, 2, 2 * kDstHalfWidth * kDstHalfHeight * DST_BPC); \
- memset(dst_y_opt, 101, kWidth* kHeight* DST_BPC); \
- memset(dst_uv_opt, 102, 2 * kDstHalfWidth * kDstHalfHeight * DST_BPC); \
- MaskCpuFlags(disable_cpu_flags_); \
- SRC_FMT_PLANAR##To##FMT_PLANAR( \
- src_y_p, kWidth, src_uv_p, 2 * kSrcHalfWidth, \
- DOY ? reinterpret_cast<DST_T*>(dst_y_c) : NULL, kWidth, \
- reinterpret_cast<DST_T*>(dst_uv_c), 2 * kDstHalfWidth, kWidth, \
- NEG kHeight); \
- MaskCpuFlags(benchmark_cpu_info_); \
- for (int i = 0; i < benchmark_iterations_; ++i) { \
- SRC_FMT_PLANAR##To##FMT_PLANAR( \
- src_y_p, kWidth, src_uv_p, 2 * kSrcHalfWidth, \
- DOY ? reinterpret_cast<DST_T*>(dst_y_opt) : NULL, kWidth, \
- reinterpret_cast<DST_T*>(dst_uv_opt), 2 * kDstHalfWidth, kWidth, \
- NEG kHeight); \
- } \
- if (DOY) { \
- for (int i = 0; i < kHeight; ++i) { \
- for (int j = 0; j < kWidth; ++j) { \
- EXPECT_EQ(dst_y_c[i * kWidth + j], dst_y_opt[i * kWidth + j]); \
- } \
- } \
- } \
- for (int i = 0; i < kDstHalfHeight; ++i) { \
- for (int j = 0; j < 2 * kDstHalfWidth; ++j) { \
- EXPECT_EQ(dst_uv_c[i * 2 * kDstHalfWidth + j], \
- dst_uv_opt[i * 2 * kDstHalfWidth + j]); \
- } \
- } \
- free_aligned_buffer_page_end(dst_y_c); \
- free_aligned_buffer_page_end(dst_uv_c); \
- free_aligned_buffer_page_end(dst_y_opt); \
- free_aligned_buffer_page_end(dst_uv_opt); \
- free_aligned_buffer_page_end(src_y); \
- free_aligned_buffer_page_end(src_uv); \
+#define TESTBPTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
+ SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \
+ DST_SUBSAMP_Y, W1280, N, NEG, OFF, DOY, SRC_DEPTH, \
+ TILE_WIDTH, TILE_HEIGHT) \
+ TEST_F(LibYUVConvertTest, SRC_FMT_PLANAR##To##FMT_PLANAR##N) { \
+ static_assert(DST_BPC == 1 || DST_BPC == 2, "DST BPC unsupported"); \
+ static_assert(SRC_SUBSAMP_X == 1 || SRC_SUBSAMP_X == 2, \
+ "SRC_SUBSAMP_X unsupported"); \
+ static_assert(SRC_SUBSAMP_Y == 1 || SRC_SUBSAMP_Y == 2, \
+ "SRC_SUBSAMP_Y unsupported"); \
+ static_assert(DST_SUBSAMP_X == 1 || DST_SUBSAMP_X == 2, \
+ "DST_SUBSAMP_X unsupported"); \
+ static_assert(DST_SUBSAMP_Y == 1 || DST_SUBSAMP_Y == 2, \
+ "DST_SUBSAMP_Y unsupported"); \
+ const int kWidth = W1280; \
+ const int kHeight = benchmark_height_; \
+ const int kSrcHalfWidth = SUBSAMPLE(kWidth, SRC_SUBSAMP_X); \
+ const int kDstHalfWidth = SUBSAMPLE(kWidth, DST_SUBSAMP_X); \
+ const int kDstHalfHeight = SUBSAMPLE(kHeight, DST_SUBSAMP_Y); \
+ const int kPaddedWidth = (kWidth + (TILE_WIDTH - 1)) & ~(TILE_WIDTH - 1); \
+ const int kPaddedHeight = \
+ (kHeight + (TILE_HEIGHT - 1)) & ~(TILE_HEIGHT - 1); \
+ const int kSrcHalfPaddedWidth = SUBSAMPLE(kPaddedWidth, SRC_SUBSAMP_X); \
+ const int kSrcHalfPaddedHeight = SUBSAMPLE(kPaddedHeight, SRC_SUBSAMP_Y); \
+ align_buffer_page_end(src_y, kPaddedWidth* kPaddedHeight* SRC_BPC + OFF); \
+ align_buffer_page_end( \
+ src_uv, \
+ 2 * kSrcHalfPaddedWidth * kSrcHalfPaddedHeight * SRC_BPC + OFF); \
+ align_buffer_page_end(dst_y_c, kWidth* kHeight* DST_BPC); \
+ align_buffer_page_end(dst_uv_c, \
+ 2 * kDstHalfWidth * kDstHalfHeight * DST_BPC); \
+ align_buffer_page_end(dst_y_opt, kWidth* kHeight* DST_BPC); \
+ align_buffer_page_end(dst_uv_opt, \
+ 2 * kDstHalfWidth * kDstHalfHeight * DST_BPC); \
+ SRC_T* src_y_p = reinterpret_cast<SRC_T*>(src_y + OFF); \
+ SRC_T* src_uv_p = reinterpret_cast<SRC_T*>(src_uv + OFF); \
+ for (int i = 0; \
+ i < kPaddedWidth * kPaddedHeight * SRC_BPC / (int)sizeof(SRC_T); \
+ ++i) { \
+ src_y_p[i] = \
+ (fastrand() & (((SRC_T)(-1)) << ((8 * SRC_BPC) - SRC_DEPTH))); \
+ } \
+ for (int i = 0; i < kSrcHalfPaddedWidth * kSrcHalfPaddedHeight * 2 * \
+ SRC_BPC / (int)sizeof(SRC_T); \
+ ++i) { \
+ src_uv_p[i] = \
+ (fastrand() & (((SRC_T)(-1)) << ((8 * SRC_BPC) - SRC_DEPTH))); \
+ } \
+ memset(dst_y_c, 1, kWidth* kHeight* DST_BPC); \
+ memset(dst_uv_c, 2, 2 * kDstHalfWidth * kDstHalfHeight * DST_BPC); \
+ memset(dst_y_opt, 101, kWidth* kHeight* DST_BPC); \
+ memset(dst_uv_opt, 102, 2 * kDstHalfWidth * kDstHalfHeight * DST_BPC); \
+ MaskCpuFlags(disable_cpu_flags_); \
+ SRC_FMT_PLANAR##To##FMT_PLANAR( \
+ src_y_p, kWidth* SRC_BPC / (int)sizeof(SRC_T), src_uv_p, \
+ 2 * kSrcHalfWidth * SRC_BPC / (int)sizeof(SRC_T), \
+ DOY ? reinterpret_cast<DST_T*>(dst_y_c) : NULL, kWidth, \
+ reinterpret_cast<DST_T*>(dst_uv_c), 2 * kDstHalfWidth, kWidth, \
+ NEG kHeight); \
+ MaskCpuFlags(benchmark_cpu_info_); \
+ for (int i = 0; i < benchmark_iterations_; ++i) { \
+ SRC_FMT_PLANAR##To##FMT_PLANAR( \
+ src_y_p, kWidth* SRC_BPC / (int)sizeof(SRC_T), src_uv_p, \
+ 2 * kSrcHalfWidth * SRC_BPC / (int)sizeof(SRC_T), \
+ DOY ? reinterpret_cast<DST_T*>(dst_y_opt) : NULL, kWidth, \
+ reinterpret_cast<DST_T*>(dst_uv_opt), 2 * kDstHalfWidth, kWidth, \
+ NEG kHeight); \
+ } \
+ if (DOY) { \
+ for (int i = 0; i < kHeight; ++i) { \
+ for (int j = 0; j < kWidth; ++j) { \
+ EXPECT_EQ(dst_y_c[i * kWidth + j], dst_y_opt[i * kWidth + j]); \
+ } \
+ } \
+ } \
+ for (int i = 0; i < kDstHalfHeight; ++i) { \
+ for (int j = 0; j < 2 * kDstHalfWidth; ++j) { \
+ EXPECT_EQ(dst_uv_c[i * 2 * kDstHalfWidth + j], \
+ dst_uv_opt[i * 2 * kDstHalfWidth + j]); \
+ } \
+ } \
+ free_aligned_buffer_page_end(dst_y_c); \
+ free_aligned_buffer_page_end(dst_uv_c); \
+ free_aligned_buffer_page_end(dst_y_opt); \
+ free_aligned_buffer_page_end(dst_uv_opt); \
+ free_aligned_buffer_page_end(src_y); \
+ free_aligned_buffer_page_end(src_uv); \
}
-#define TESTBIPLANARTOBP(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
- SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, \
- DST_SUBSAMP_X, DST_SUBSAMP_Y, SRC_DEPTH, TILE_WIDTH, \
- TILE_HEIGHT) \
- TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
- SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \
- DST_SUBSAMP_Y, benchmark_width_ + 1, _Any, +, 0, 1, \
- SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT) \
- TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
- SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \
- DST_SUBSAMP_Y, benchmark_width_, _Unaligned, +, 2, 1, \
- SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT) \
- TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
- SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \
- DST_SUBSAMP_Y, benchmark_width_, _Invert, -, 0, 1, \
- SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT) \
- TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
- SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \
- DST_SUBSAMP_Y, benchmark_width_, _Opt, +, 0, 1, SRC_DEPTH, \
- TILE_WIDTH, TILE_HEIGHT) \
- TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
- SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \
- DST_SUBSAMP_Y, benchmark_width_, _NullY, +, 0, 0, \
- SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT)
-
-TESTBIPLANARTOBP(NV21, uint8_t, 1, 2, 2, NV12, uint8_t, 1, 2, 2, 8, 1, 1)
-TESTBIPLANARTOBP(NV12, uint8_t, 1, 2, 2, NV12Mirror, uint8_t, 1, 2, 2, 8, 1, 1)
-TESTBIPLANARTOBP(NV12, uint8_t, 1, 2, 2, NV24, uint8_t, 1, 1, 1, 8, 1, 1)
-TESTBIPLANARTOBP(NV16, uint8_t, 1, 2, 1, NV24, uint8_t, 1, 1, 1, 8, 1, 1)
-TESTBIPLANARTOBP(P010, uint16_t, 2, 2, 2, P410, uint16_t, 2, 1, 1, 10, 1, 1)
-TESTBIPLANARTOBP(P210, uint16_t, 2, 2, 1, P410, uint16_t, 2, 1, 1, 10, 1, 1)
-TESTBIPLANARTOBP(P012, uint16_t, 2, 2, 2, P412, uint16_t, 2, 1, 1, 10, 1, 1)
-TESTBIPLANARTOBP(P212, uint16_t, 2, 2, 1, P412, uint16_t, 2, 1, 1, 12, 1, 1)
-TESTBIPLANARTOBP(P016, uint16_t, 2, 2, 2, P416, uint16_t, 2, 1, 1, 12, 1, 1)
-TESTBIPLANARTOBP(P216, uint16_t, 2, 2, 1, P416, uint16_t, 2, 1, 1, 12, 1, 1)
-TESTBIPLANARTOBP(MM21, uint8_t, 1, 2, 2, NV12, uint8_t, 1, 2, 2, 8, 16, 32)
-
-#define TESTBIPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
- SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, \
- DST_SUBSAMP_X, DST_SUBSAMP_Y, W1280, N, NEG, OFF, \
- SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT) \
+#define TESTBPTOBP(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
+ SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \
+ DST_SUBSAMP_Y, SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT) \
+ TESTBPTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
+ FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \
+ benchmark_width_ + 1, _Any, +, 0, 1, SRC_DEPTH, TILE_WIDTH, \
+ TILE_HEIGHT) \
+ TESTBPTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
+ FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \
+ benchmark_width_, _Unaligned, +, 2, 1, SRC_DEPTH, TILE_WIDTH, \
+ TILE_HEIGHT) \
+ TESTBPTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
+ FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \
+ benchmark_width_, _Invert, -, 0, 1, SRC_DEPTH, TILE_WIDTH, \
+ TILE_HEIGHT) \
+ TESTBPTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
+ FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \
+ benchmark_width_, _Opt, +, 0, 1, SRC_DEPTH, TILE_WIDTH, \
+ TILE_HEIGHT) \
+ TESTBPTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
+ FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \
+ benchmark_width_, _NullY, +, 0, 0, SRC_DEPTH, TILE_WIDTH, \
+ TILE_HEIGHT)
+
+TESTBPTOBP(NV21, uint8_t, 1, 2, 2, NV12, uint8_t, 1, 2, 2, 8, 1, 1)
+TESTBPTOBP(NV12, uint8_t, 1, 2, 2, NV12Mirror, uint8_t, 1, 2, 2, 8, 1, 1)
+TESTBPTOBP(NV12, uint8_t, 1, 2, 2, NV24, uint8_t, 1, 1, 1, 8, 1, 1)
+TESTBPTOBP(NV16, uint8_t, 1, 2, 1, NV24, uint8_t, 1, 1, 1, 8, 1, 1)
+TESTBPTOBP(P010, uint16_t, 2, 2, 2, P410, uint16_t, 2, 1, 1, 10, 1, 1)
+TESTBPTOBP(P210, uint16_t, 2, 2, 1, P410, uint16_t, 2, 1, 1, 10, 1, 1)
+TESTBPTOBP(P012, uint16_t, 2, 2, 2, P412, uint16_t, 2, 1, 1, 10, 1, 1)
+TESTBPTOBP(P212, uint16_t, 2, 2, 1, P412, uint16_t, 2, 1, 1, 12, 1, 1)
+TESTBPTOBP(P016, uint16_t, 2, 2, 2, P416, uint16_t, 2, 1, 1, 12, 1, 1)
+TESTBPTOBP(P216, uint16_t, 2, 2, 1, P416, uint16_t, 2, 1, 1, 12, 1, 1)
+TESTBPTOBP(MM21, uint8_t, 1, 2, 2, NV12, uint8_t, 1, 2, 2, 8, 16, 32)
+TESTBPTOBP(MT2T, uint8_t, 10 / 8, 2, 2, P010, uint16_t, 2, 2, 2, 10, 16, 32)
+
+#define TESTBPTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
+ SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \
+ DST_SUBSAMP_Y, W1280, N, NEG, OFF, SRC_DEPTH, TILE_WIDTH, \
+ TILE_HEIGHT) \
TEST_F(LibYUVConvertTest, SRC_FMT_PLANAR##To##FMT_PLANAR##N) { \
static_assert(SRC_BPC == 1 || SRC_BPC == 2, "SRC BPC unsupported"); \
static_assert(DST_BPC == 1 || DST_BPC == 2, "DST BPC unsupported"); \
@@ -621,30 +630,30 @@ TESTBIPLANARTOBP(MM21, uint8_t, 1, 2, 2, NV12, uint8_t, 1, 2, 2, 8, 16, 32)
free_aligned_buffer_page_end(src_uv); \
}
-#define TESTBIPLANARTOP(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
- SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, \
- DST_SUBSAMP_X, DST_SUBSAMP_Y, SRC_DEPTH, TILE_WIDTH, \
- TILE_HEIGHT) \
- TESTBIPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
- SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \
- DST_SUBSAMP_Y, benchmark_width_ + 1, _Any, +, 0, SRC_DEPTH, \
- TILE_WIDTH, TILE_HEIGHT) \
- TESTBIPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
- SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \
- DST_SUBSAMP_Y, benchmark_width_, _Unaligned, +, 2, \
- SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT) \
- TESTBIPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
- SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \
- DST_SUBSAMP_Y, benchmark_width_, _Invert, -, 0, SRC_DEPTH, \
- TILE_WIDTH, TILE_HEIGHT) \
- TESTBIPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
- SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \
- DST_SUBSAMP_Y, benchmark_width_, _Opt, +, 0, SRC_DEPTH, \
- TILE_WIDTH, TILE_HEIGHT)
-
-TESTBIPLANARTOP(NV12, uint8_t, 1, 2, 2, I420, uint8_t, 1, 2, 2, 8, 1, 1)
-TESTBIPLANARTOP(NV21, uint8_t, 1, 2, 2, I420, uint8_t, 1, 2, 2, 8, 1, 1)
-TESTBIPLANARTOP(MM21, uint8_t, 1, 2, 2, I420, uint8_t, 1, 2, 2, 8, 16, 32)
+#define TESTBPTOP(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
+ SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \
+ DST_SUBSAMP_Y, SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT) \
+ TESTBPTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
+ FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \
+ benchmark_width_ + 1, _Any, +, 0, SRC_DEPTH, TILE_WIDTH, \
+ TILE_HEIGHT) \
+ TESTBPTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
+ FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \
+ benchmark_width_, _Unaligned, +, 2, SRC_DEPTH, TILE_WIDTH, \
+ TILE_HEIGHT) \
+ TESTBPTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
+ FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \
+ benchmark_width_, _Invert, -, 0, SRC_DEPTH, TILE_WIDTH, \
+ TILE_HEIGHT) \
+ TESTBPTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
+ FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \
+ benchmark_width_, _Opt, +, 0, SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT)
+
+TESTBPTOP(NV12, uint8_t, 1, 2, 2, I420, uint8_t, 1, 2, 2, 8, 1, 1)
+TESTBPTOP(NV21, uint8_t, 1, 2, 2, I420, uint8_t, 1, 2, 2, 8, 1, 1)
+TESTBPTOP(MM21, uint8_t, 1, 2, 2, I420, uint8_t, 1, 2, 2, 8, 16, 32)
+TESTBPTOP(P010, uint16_t, 2, 2, 2, I010, uint16_t, 2, 2, 2, 10, 1, 1)
+TESTBPTOP(P012, uint16_t, 2, 2, 2, I012, uint16_t, 2, 2, 2, 12, 1, 1)
// Provide matrix wrappers for full range bt.709
#define F420ToABGR(a, b, c, d, e, f, g, h, i, j) \
@@ -680,6 +689,12 @@ TESTBIPLANARTOP(MM21, uint8_t, 1, 2, 2, I420, uint8_t, 1, 2, 2, 8, 16, 32)
#define I422ToARGBFilter(a, b, c, d, e, f, g, h, i, j) \
I422ToARGBMatrixFilter(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j, \
kFilterBilinear)
+#define I420ToRGB24Filter(a, b, c, d, e, f, g, h, i, j) \
+ I420ToRGB24MatrixFilter(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j, \
+ kFilterBilinear)
+#define I422ToRGB24Filter(a, b, c, d, e, f, g, h, i, j) \
+ I420ToRGB24MatrixFilter(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j, \
+ kFilterBilinear)
#define ALIGNINT(V, ALIGN) (((V) + (ALIGN)-1) / (ALIGN) * (ALIGN))
@@ -792,8 +807,12 @@ TESTPLANARTOB(V422, 2, 1, ARGB, 4, 4, 1)
TESTPLANARTOB(V422, 2, 1, ABGR, 4, 4, 1)
TESTPLANARTOB(I422, 2, 1, BGRA, 4, 4, 1)
TESTPLANARTOB(I422, 2, 1, RGBA, 4, 4, 1)
+TESTPLANARTOB(I422, 1, 1, RGB24, 3, 3, 1)
+TESTPLANARTOB(I422, 1, 1, RAW, 3, 3, 1)
TESTPLANARTOB(I444, 1, 1, ARGB, 4, 4, 1)
TESTPLANARTOB(I444, 1, 1, ABGR, 4, 4, 1)
+TESTPLANARTOB(I444, 1, 1, RGB24, 3, 3, 1)
+TESTPLANARTOB(I444, 1, 1, RAW, 3, 3, 1)
TESTPLANARTOB(J444, 1, 1, ARGB, 4, 4, 1)
TESTPLANARTOB(J444, 1, 1, ABGR, 4, 4, 1)
TESTPLANARTOB(H444, 1, 1, ARGB, 4, 4, 1)
@@ -816,6 +835,8 @@ TESTPLANARTOB(H420, 2, 2, AB30, 4, 4, 1)
#endif
TESTPLANARTOB(I420, 2, 2, ARGBFilter, 4, 4, 1)
TESTPLANARTOB(I422, 2, 1, ARGBFilter, 4, 4, 1)
+TESTPLANARTOB(I420, 2, 2, RGB24Filter, 3, 3, 1)
+TESTPLANARTOB(I422, 2, 2, RGB24Filter, 3, 3, 1)
#else
TESTPLANARTOB(I420, 2, 2, ABGR, 4, 4, 1)
TESTPLANARTOB(I420, 2, 2, ARGB, 4, 4, 1)
@@ -832,14 +853,15 @@ TESTPLANARTOB(I422, 2, 1, RGB565, 2, 2, 1)
TESTPLANARTOB(I420, 2, 2, I400, 1, 1, 1)
TESTPLANARTOB(I420, 2, 2, UYVY, 2, 4, 1)
TESTPLANARTOB(I420, 2, 2, YUY2, 2, 4, 1)
-TESTPLANARTOB(I420, 2, 2, ARGBFilter, 4, 4, 1)
TESTPLANARTOB(I422, 2, 1, ABGR, 4, 4, 1)
TESTPLANARTOB(I422, 2, 1, ARGB, 4, 4, 1)
TESTPLANARTOB(I422, 2, 1, BGRA, 4, 4, 1)
TESTPLANARTOB(I422, 2, 1, RGBA, 4, 4, 1)
TESTPLANARTOB(I422, 2, 1, UYVY, 2, 4, 1)
TESTPLANARTOB(I422, 2, 1, YUY2, 2, 4, 1)
+TESTPLANARTOB(I420, 2, 2, ARGBFilter, 4, 4, 1)
TESTPLANARTOB(I422, 2, 1, ARGBFilter, 4, 4, 1)
+TESTPLANARTOB(I420, 2, 2, RGB24Filter, 3, 3, 1)
TESTPLANARTOB(I444, 1, 1, ABGR, 4, 4, 1)
TESTPLANARTOB(I444, 1, 1, ARGB, 4, 4, 1)
#endif
@@ -1056,8 +1078,8 @@ TESTQPLANARTOB(I420Alpha, 2, 2, ARGBFilter, 4, 4, 1)
TESTQPLANARTOB(I422Alpha, 2, 1, ARGBFilter, 4, 4, 1)
#endif
-#define TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, \
- BPP_B, W1280, N, NEG, OFF) \
+#define TESTBPTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \
+ W1280, N, NEG, OFF) \
TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##N) { \
const int kWidth = W1280; \
const int kHeight = benchmark_height_; \
@@ -1110,15 +1132,15 @@ TESTQPLANARTOB(I422Alpha, 2, 1, ARGBFilter, 4, 4, 1)
free_aligned_buffer_page_end(dst_argb32_opt); \
}
-#define TESTBIPLANARTOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B) \
- TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \
- benchmark_width_ + 1, _Any, +, 0) \
- TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \
- benchmark_width_, _Unaligned, +, 2) \
- TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \
- benchmark_width_, _Invert, -, 0) \
- TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \
- benchmark_width_, _Opt, +, 0)
+#define TESTBPTOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B) \
+ TESTBPTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \
+ benchmark_width_ + 1, _Any, +, 0) \
+ TESTBPTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \
+ benchmark_width_, _Unaligned, +, 2) \
+ TESTBPTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \
+ benchmark_width_, _Invert, -, 0) \
+ TESTBPTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \
+ benchmark_width_, _Opt, +, 0)
#define JNV12ToARGB(a, b, c, d, e, f, g, h) \
NV12ToARGBMatrix(a, b, c, d, e, f, &kYuvJPEGConstants, g, h)
@@ -1139,29 +1161,29 @@ TESTQPLANARTOB(I422Alpha, 2, 1, ARGBFilter, 4, 4, 1)
#define JNV12ToRGB565(a, b, c, d, e, f, g, h) \
NV12ToRGB565Matrix(a, b, c, d, e, f, &kYuvJPEGConstants, g, h)
-TESTBIPLANARTOB(JNV12, 2, 2, ARGB, ARGB, 4)
-TESTBIPLANARTOB(JNV21, 2, 2, ARGB, ARGB, 4)
-TESTBIPLANARTOB(JNV12, 2, 2, ABGR, ABGR, 4)
-TESTBIPLANARTOB(JNV21, 2, 2, ABGR, ABGR, 4)
-TESTBIPLANARTOB(JNV12, 2, 2, RGB24, RGB24, 3)
-TESTBIPLANARTOB(JNV21, 2, 2, RGB24, RGB24, 3)
-TESTBIPLANARTOB(JNV12, 2, 2, RAW, RAW, 3)
-TESTBIPLANARTOB(JNV21, 2, 2, RAW, RAW, 3)
+TESTBPTOB(JNV12, 2, 2, ARGB, ARGB, 4)
+TESTBPTOB(JNV21, 2, 2, ARGB, ARGB, 4)
+TESTBPTOB(JNV12, 2, 2, ABGR, ABGR, 4)
+TESTBPTOB(JNV21, 2, 2, ABGR, ABGR, 4)
+TESTBPTOB(JNV12, 2, 2, RGB24, RGB24, 3)
+TESTBPTOB(JNV21, 2, 2, RGB24, RGB24, 3)
+TESTBPTOB(JNV12, 2, 2, RAW, RAW, 3)
+TESTBPTOB(JNV21, 2, 2, RAW, RAW, 3)
#ifdef LITTLE_ENDIAN_ONLY_TEST
-TESTBIPLANARTOB(JNV12, 2, 2, RGB565, RGB565, 2)
+TESTBPTOB(JNV12, 2, 2, RGB565, RGB565, 2)
#endif
-TESTBIPLANARTOB(NV12, 2, 2, ARGB, ARGB, 4)
-TESTBIPLANARTOB(NV21, 2, 2, ARGB, ARGB, 4)
-TESTBIPLANARTOB(NV12, 2, 2, ABGR, ABGR, 4)
-TESTBIPLANARTOB(NV21, 2, 2, ABGR, ABGR, 4)
-TESTBIPLANARTOB(NV12, 2, 2, RGB24, RGB24, 3)
-TESTBIPLANARTOB(NV21, 2, 2, RGB24, RGB24, 3)
-TESTBIPLANARTOB(NV12, 2, 2, RAW, RAW, 3)
-TESTBIPLANARTOB(NV21, 2, 2, RAW, RAW, 3)
-TESTBIPLANARTOB(NV21, 2, 2, YUV24, RAW, 3)
+TESTBPTOB(NV12, 2, 2, ARGB, ARGB, 4)
+TESTBPTOB(NV21, 2, 2, ARGB, ARGB, 4)
+TESTBPTOB(NV12, 2, 2, ABGR, ABGR, 4)
+TESTBPTOB(NV21, 2, 2, ABGR, ABGR, 4)
+TESTBPTOB(NV12, 2, 2, RGB24, RGB24, 3)
+TESTBPTOB(NV21, 2, 2, RGB24, RGB24, 3)
+TESTBPTOB(NV12, 2, 2, RAW, RAW, 3)
+TESTBPTOB(NV21, 2, 2, RAW, RAW, 3)
+TESTBPTOB(NV21, 2, 2, YUV24, RAW, 3)
#ifdef LITTLE_ENDIAN_ONLY_TEST
-TESTBIPLANARTOB(NV12, 2, 2, RGB565, RGB565, 2)
+TESTBPTOB(NV12, 2, 2, RGB565, RGB565, 2)
#endif
#define TESTATOPLANARI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
@@ -1236,6 +1258,8 @@ TESTATOPLANAR(ARGB, 4, 1, I422, 2, 1)
TESTATOPLANAR(ARGB, 4, 1, I444, 1, 1)
TESTATOPLANAR(ARGB, 4, 1, J420, 2, 2)
TESTATOPLANAR(ARGB, 4, 1, J422, 2, 1)
+TESTATOPLANAR(ABGR, 4, 1, J420, 2, 2)
+TESTATOPLANAR(ABGR, 4, 1, J422, 2, 1)
#ifdef LITTLE_ENDIAN_ONLY_TEST
TESTATOPLANAR(ARGB4444, 2, 1, I420, 2, 2)
TESTATOPLANAR(RGB565, 2, 1, I420, 2, 2)
@@ -1254,8 +1278,84 @@ TESTATOPLANAR(UYVY, 2, 1, I422, 2, 1)
TESTATOPLANAR(YUY2, 2, 1, I420, 2, 2)
TESTATOPLANAR(YUY2, 2, 1, I422, 2, 1)
-#define TESTATOBIPLANARI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, \
- SUBSAMP_Y, W1280, N, NEG, OFF) \
+#define TESTATOPLANARAI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, \
+ SUBSAMP_Y, W1280, N, NEG, OFF) \
+ TEST_F(LibYUVConvertTest, FMT_A##To##FMT_PLANAR##N) { \
+ const int kWidth = W1280; \
+ const int kHeight = ALIGNINT(benchmark_height_, YALIGN); \
+ const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X); \
+ const int kStride = (kStrideUV * SUBSAMP_X * 8 * BPP_A + 7) / 8; \
+ align_buffer_page_end(src_argb, kStride* kHeight + OFF); \
+ align_buffer_page_end(dst_a_c, kWidth* kHeight); \
+ align_buffer_page_end(dst_y_c, kWidth* kHeight); \
+ align_buffer_page_end(dst_uv_c, \
+ kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
+ align_buffer_page_end(dst_a_opt, kWidth* kHeight); \
+ align_buffer_page_end(dst_y_opt, kWidth* kHeight); \
+ align_buffer_page_end(dst_uv_opt, \
+ kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
+ memset(dst_a_c, 1, kWidth* kHeight); \
+ memset(dst_y_c, 2, kWidth* kHeight); \
+ memset(dst_uv_c, 3, kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
+ memset(dst_a_opt, 101, kWidth* kHeight); \
+ memset(dst_y_opt, 102, kWidth* kHeight); \
+ memset(dst_uv_opt, 103, kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
+ for (int i = 0; i < kHeight; ++i) \
+ for (int j = 0; j < kStride; ++j) \
+ src_argb[(i * kStride) + j + OFF] = (fastrand() & 0xff); \
+ MaskCpuFlags(disable_cpu_flags_); \
+ FMT_A##To##FMT_PLANAR(src_argb + OFF, kStride, dst_y_c, kWidth, dst_uv_c, \
+ kStrideUV * 2, dst_uv_c + kStrideUV, kStrideUV * 2, \
+ dst_a_c, kWidth, kWidth, NEG kHeight); \
+ MaskCpuFlags(benchmark_cpu_info_); \
+ for (int i = 0; i < benchmark_iterations_; ++i) { \
+ FMT_A##To##FMT_PLANAR(src_argb + OFF, kStride, dst_y_opt, kWidth, \
+ dst_uv_opt, kStrideUV * 2, dst_uv_opt + kStrideUV, \
+ kStrideUV * 2, dst_a_opt, kWidth, kWidth, \
+ NEG kHeight); \
+ } \
+ for (int i = 0; i < kHeight; ++i) { \
+ for (int j = 0; j < kWidth; ++j) { \
+ EXPECT_EQ(dst_y_c[i * kWidth + j], dst_y_opt[i * kWidth + j]); \
+ EXPECT_EQ(dst_a_c[i * kWidth + j], dst_a_opt[i * kWidth + j]); \
+ } \
+ } \
+ for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y) * 2; ++i) { \
+ for (int j = 0; j < kStrideUV; ++j) { \
+ EXPECT_EQ(dst_uv_c[i * kStrideUV + j], dst_uv_opt[i * kStrideUV + j]); \
+ } \
+ } \
+ free_aligned_buffer_page_end(dst_a_c); \
+ free_aligned_buffer_page_end(dst_y_c); \
+ free_aligned_buffer_page_end(dst_uv_c); \
+ free_aligned_buffer_page_end(dst_a_opt); \
+ free_aligned_buffer_page_end(dst_y_opt); \
+ free_aligned_buffer_page_end(dst_uv_opt); \
+ free_aligned_buffer_page_end(src_argb); \
+ }
+
+#if defined(ENABLE_FULL_TESTS)
+#define TESTATOPLANARA(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y) \
+ TESTATOPLANARAI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
+ benchmark_width_ + 1, _Any, +, 0) \
+ TESTATOPLANARAI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
+ benchmark_width_, _Unaligned, +, 2) \
+ TESTATOPLANARAI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
+ benchmark_width_, _Invert, -, 0) \
+ TESTATOPLANARAI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
+ benchmark_width_, _Opt, +, 0)
+#else
+#define TESTATOPLANARA(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y) \
+ TESTATOPLANARAI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
+ benchmark_width_ + 1, _Any, +, 0) \
+ TESTATOPLANARAI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
+ benchmark_width_, _Opt, +, 0)
+#endif
+
+TESTATOPLANARA(ARGB, 4, 1, I420Alpha, 2, 2)
+
+#define TESTATOBPI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
+ W1280, N, NEG, OFF) \
TEST_F(LibYUVConvertTest, FMT_A##To##FMT_PLANAR##N) { \
const int kWidth = W1280; \
const int kHeight = benchmark_height_; \
@@ -1301,25 +1401,25 @@ TESTATOPLANAR(YUY2, 2, 1, I422, 2, 1)
free_aligned_buffer_page_end(src_argb); \
}
-#define TESTATOBIPLANAR(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y) \
- TESTATOBIPLANARI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
- benchmark_width_ + 1, _Any, +, 0) \
- TESTATOBIPLANARI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
- benchmark_width_, _Unaligned, +, 2) \
- TESTATOBIPLANARI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
- benchmark_width_, _Invert, -, 0) \
- TESTATOBIPLANARI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
- benchmark_width_, _Opt, +, 0)
-
-TESTATOBIPLANAR(ARGB, 1, 4, NV12, 2, 2)
-TESTATOBIPLANAR(ARGB, 1, 4, NV21, 2, 2)
-TESTATOBIPLANAR(ABGR, 1, 4, NV12, 2, 2)
-TESTATOBIPLANAR(ABGR, 1, 4, NV21, 2, 2)
-TESTATOBIPLANAR(RAW, 1, 3, JNV21, 2, 2)
-TESTATOBIPLANAR(YUY2, 2, 4, NV12, 2, 2)
-TESTATOBIPLANAR(UYVY, 2, 4, NV12, 2, 2)
-TESTATOBIPLANAR(AYUV, 1, 4, NV12, 2, 2)
-TESTATOBIPLANAR(AYUV, 1, 4, NV21, 2, 2)
+#define TESTATOBP(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y) \
+ TESTATOBPI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
+ benchmark_width_ + 1, _Any, +, 0) \
+ TESTATOBPI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
+ benchmark_width_, _Unaligned, +, 2) \
+ TESTATOBPI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
+ benchmark_width_, _Invert, -, 0) \
+ TESTATOBPI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
+ benchmark_width_, _Opt, +, 0)
+
+TESTATOBP(ARGB, 1, 4, NV12, 2, 2)
+TESTATOBP(ARGB, 1, 4, NV21, 2, 2)
+TESTATOBP(ABGR, 1, 4, NV12, 2, 2)
+TESTATOBP(ABGR, 1, 4, NV21, 2, 2)
+TESTATOBP(RAW, 1, 3, JNV21, 2, 2)
+TESTATOBP(YUY2, 2, 4, NV12, 2, 2)
+TESTATOBP(UYVY, 2, 4, NV12, 2, 2)
+TESTATOBP(AYUV, 1, 4, NV12, 2, 2)
+TESTATOBP(AYUV, 1, 4, NV21, 2, 2)
#define TESTATOBI(FMT_A, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, FMT_B, TYPE_B, \
EPP_B, STRIDE_B, HEIGHT_B, W1280, N, NEG, OFF) \
@@ -1440,6 +1540,7 @@ TESTATOB(ARGB, uint8_t, 4, 4, 1, ARGBMirror, uint8_t, 4, 4, 1)
TESTATOB(ARGB, uint8_t, 4, 4, 1, BGRA, uint8_t, 4, 4, 1)
TESTATOB(ARGB, uint8_t, 4, 4, 1, I400, uint8_t, 1, 1, 1)
TESTATOB(ARGB, uint8_t, 4, 4, 1, J400, uint8_t, 1, 1, 1)
+TESTATOB(ABGR, uint8_t, 4, 4, 1, J400, uint8_t, 1, 1, 1)
TESTATOB(RGBA, uint8_t, 4, 4, 1, J400, uint8_t, 1, 1, 1)
TESTATOB(ARGB, uint8_t, 4, 4, 1, RAW, uint8_t, 3, 3, 1)
TESTATOB(ARGB, uint8_t, 4, 4, 1, RGB24, uint8_t, 3, 3, 1)
@@ -1450,7 +1551,7 @@ TESTATOB(ARGB, uint8_t, 4, 4, 1, RGB565, uint8_t, 2, 2, 1)
#endif
TESTATOB(ARGB, uint8_t, 4, 4, 1, RGBA, uint8_t, 4, 4, 1)
TESTATOB(ARGB, uint8_t, 4, 4, 1, UYVY, uint8_t, 2, 4, 1)
-TESTATOB(ARGB, uint8_t, 4, 4, 1, YUY2, uint8_t, 2, 4, 1) // 4
+TESTATOB(ARGB, uint8_t, 4, 4, 1, YUY2, uint8_t, 2, 4, 1)
TESTATOB(ARGB1555, uint8_t, 2, 2, 1, ARGB, uint8_t, 4, 4, 1)
TESTATOB(ARGB4444, uint8_t, 2, 2, 1, ARGB, uint8_t, 4, 4, 1)
TESTATOB(BGRA, uint8_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1)
@@ -1484,6 +1585,127 @@ TESTATOB(AB64, uint16_t, 4, 4, 1, ABGR, uint8_t, 4, 4, 1)
TESTATOB(AR64, uint16_t, 4, 4, 1, AB64, uint16_t, 4, 4, 1)
TESTATOB(AB64, uint16_t, 4, 4, 1, AR64, uint16_t, 4, 4, 1)
+// in place test
+#define TESTATOAI(FMT_A, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, FMT_B, TYPE_B, \
+ EPP_B, STRIDE_B, HEIGHT_B, W1280, N, NEG, OFF) \
+ TEST_F(LibYUVConvertTest, FMT_A##To##FMT_B##N) { \
+ const int kWidth = W1280; \
+ const int kHeight = benchmark_height_; \
+ const int kHeightA = (kHeight + HEIGHT_A - 1) / HEIGHT_A * HEIGHT_A; \
+ const int kHeightB = (kHeight + HEIGHT_B - 1) / HEIGHT_B * HEIGHT_B; \
+ const int kStrideA = \
+ (kWidth * EPP_A + STRIDE_A - 1) / STRIDE_A * STRIDE_A; \
+ const int kStrideB = \
+ (kWidth * EPP_B + STRIDE_B - 1) / STRIDE_B * STRIDE_B; \
+ align_buffer_page_end(src_argb, \
+ kStrideA* kHeightA*(int)sizeof(TYPE_A) + OFF); \
+ align_buffer_page_end(dst_argb_c, \
+ kStrideA* kHeightA*(int)sizeof(TYPE_A) + OFF); \
+ align_buffer_page_end(dst_argb_opt, \
+ kStrideA* kHeightA*(int)sizeof(TYPE_A) + OFF); \
+ for (int i = 0; i < kStrideA * kHeightA * (int)sizeof(TYPE_A); ++i) { \
+ src_argb[i + OFF] = (fastrand() & 0xff); \
+ } \
+ memcpy(dst_argb_c + OFF, src_argb, \
+ kStrideA * kHeightA * (int)sizeof(TYPE_A)); \
+ memcpy(dst_argb_opt + OFF, src_argb, \
+ kStrideA * kHeightA * (int)sizeof(TYPE_A)); \
+ MaskCpuFlags(disable_cpu_flags_); \
+ FMT_A##To##FMT_B((TYPE_A*)(dst_argb_c /* src */ + OFF), kStrideA, \
+ (TYPE_B*)dst_argb_c, kStrideB, kWidth, NEG kHeight); \
+ MaskCpuFlags(benchmark_cpu_info_); \
+ for (int i = 0; i < benchmark_iterations_; ++i) { \
+ FMT_A##To##FMT_B((TYPE_A*)(dst_argb_opt /* src */ + OFF), kStrideA, \
+ (TYPE_B*)dst_argb_opt, kStrideB, kWidth, NEG kHeight); \
+ } \
+ memcpy(dst_argb_opt + OFF, src_argb, \
+ kStrideA * kHeightA * (int)sizeof(TYPE_A)); \
+ FMT_A##To##FMT_B((TYPE_A*)(dst_argb_opt /* src */ + OFF), kStrideA, \
+ (TYPE_B*)dst_argb_opt, kStrideB, kWidth, NEG kHeight); \
+ for (int i = 0; i < kStrideB * kHeightB * (int)sizeof(TYPE_B); ++i) { \
+ EXPECT_EQ(dst_argb_c[i], dst_argb_opt[i]); \
+ } \
+ free_aligned_buffer_page_end(src_argb); \
+ free_aligned_buffer_page_end(dst_argb_c); \
+ free_aligned_buffer_page_end(dst_argb_opt); \
+ }
+
+#define TESTATOA(FMT_A, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, FMT_B, TYPE_B, \
+ EPP_B, STRIDE_B, HEIGHT_B) \
+ TESTATOAI(FMT_A, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, FMT_B, TYPE_B, EPP_B, \
+ STRIDE_B, HEIGHT_B, benchmark_width_, _Inplace, +, 0)
+
+TESTATOA(AB30, uint8_t, 4, 4, 1, ABGR, uint8_t, 4, 4, 1)
+TESTATOA(AB30, uint8_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1)
+#ifdef LITTLE_ENDIAN_ONLY_TEST
+TESTATOA(ABGR, uint8_t, 4, 4, 1, AR30, uint8_t, 4, 4, 1)
+#endif
+TESTATOA(ABGR, uint8_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1)
+#ifdef LITTLE_ENDIAN_ONLY_TEST
+TESTATOA(AR30, uint8_t, 4, 4, 1, AB30, uint8_t, 4, 4, 1)
+#endif
+TESTATOA(AR30, uint8_t, 4, 4, 1, ABGR, uint8_t, 4, 4, 1)
+#ifdef LITTLE_ENDIAN_ONLY_TEST
+TESTATOA(AR30, uint8_t, 4, 4, 1, AR30, uint8_t, 4, 4, 1)
+TESTATOA(AR30, uint8_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1)
+#endif
+TESTATOA(ARGB, uint8_t, 4, 4, 1, ABGR, uint8_t, 4, 4, 1)
+#ifdef LITTLE_ENDIAN_ONLY_TEST
+TESTATOA(ARGB, uint8_t, 4, 4, 1, AR30, uint8_t, 4, 4, 1)
+#endif
+TESTATOA(ARGB, uint8_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1)
+TESTATOA(ARGB, uint8_t, 4, 4, 1, ARGB1555, uint8_t, 2, 2, 1)
+TESTATOA(ARGB, uint8_t, 4, 4, 1, ARGB4444, uint8_t, 2, 2, 1)
+// TODO(fbarchard): Support in place for mirror.
+// TESTATOA(ARGB, uint8_t, 4, 4, 1, ARGBMirror, uint8_t, 4, 4, 1)
+TESTATOA(ARGB, uint8_t, 4, 4, 1, BGRA, uint8_t, 4, 4, 1)
+TESTATOA(ARGB, uint8_t, 4, 4, 1, I400, uint8_t, 1, 1, 1)
+TESTATOA(ARGB, uint8_t, 4, 4, 1, J400, uint8_t, 1, 1, 1)
+TESTATOA(RGBA, uint8_t, 4, 4, 1, J400, uint8_t, 1, 1, 1)
+TESTATOA(ARGB, uint8_t, 4, 4, 1, RAW, uint8_t, 3, 3, 1)
+TESTATOA(ARGB, uint8_t, 4, 4, 1, RGB24, uint8_t, 3, 3, 1)
+TESTATOA(ABGR, uint8_t, 4, 4, 1, RAW, uint8_t, 3, 3, 1)
+TESTATOA(ABGR, uint8_t, 4, 4, 1, RGB24, uint8_t, 3, 3, 1)
+#ifdef LITTLE_ENDIAN_ONLY_TEST
+TESTATOA(ARGB, uint8_t, 4, 4, 1, RGB565, uint8_t, 2, 2, 1)
+#endif
+TESTATOA(ARGB, uint8_t, 4, 4, 1, RGBA, uint8_t, 4, 4, 1)
+TESTATOA(ARGB, uint8_t, 4, 4, 1, UYVY, uint8_t, 2, 4, 1)
+TESTATOA(ARGB, uint8_t, 4, 4, 1, YUY2, uint8_t, 2, 4, 1)
+// TODO(fbarchard): Support in place for conversions that increase bpp.
+// TESTATOA(ARGB1555, uint8_t, 2, 2, 1, ARGB, uint8_t, 4, 4, 1)
+// TESTATOA(ARGB4444, uint8_t, 2, 2, 1, ARGB, uint8_t, 4, 4, 1)
+TESTATOA(BGRA, uint8_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1)
+// TESTATOA(I400, uint8_t, 1, 1, 1, ARGB, uint8_t, 4, 4, 1)
+TESTATOA(I400, uint8_t, 1, 1, 1, I400, uint8_t, 1, 1, 1)
+// TESTATOA(I400, uint8_t, 1, 1, 1, I400Mirror, uint8_t, 1, 1, 1)
+// TESTATOA(J400, uint8_t, 1, 1, 1, ARGB, uint8_t, 4, 4, 1)
+TESTATOA(J400, uint8_t, 1, 1, 1, J400, uint8_t, 1, 1, 1)
+// TESTATOA(RAW, uint8_t, 3, 3, 1, ARGB, uint8_t, 4, 4, 1)
+// TESTATOA(RAW, uint8_t, 3, 3, 1, RGBA, uint8_t, 4, 4, 1)
+TESTATOA(RAW, uint8_t, 3, 3, 1, RGB24, uint8_t, 3, 3, 1)
+// TESTATOA(RGB24, uint8_t, 3, 3, 1, ARGB, uint8_t, 4, 4, 1)
+TESTATOA(RGB24, uint8_t, 3, 3, 1, J400, uint8_t, 1, 1, 1)
+// TESTATOA(RGB24, uint8_t, 3, 3, 1, RGB24Mirror, uint8_t, 3, 3, 1)
+TESTATOA(RAW, uint8_t, 3, 3, 1, J400, uint8_t, 1, 1, 1)
+#ifdef LITTLE_ENDIAN_ONLY_TEST
+// TESTATOA(RGB565, uint8_t, 2, 2, 1, ARGB, uint8_t, 4, 4, 1)
+#endif
+TESTATOA(RGBA, uint8_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1)
+// TESTATOA(UYVY, uint8_t, 2, 4, 1, ARGB, uint8_t, 4, 4, 1)
+// TESTATOA(YUY2, uint8_t, 2, 4, 1, ARGB, uint8_t, 4, 4, 1)
+TESTATOA(YUY2, uint8_t, 2, 4, 1, Y, uint8_t, 1, 1, 1)
+// TESTATOA(ARGB, uint8_t, 4, 4, 1, AR64, uint16_t, 4, 4, 1)
+// TESTATOA(ARGB, uint8_t, 4, 4, 1, AB64, uint16_t, 4, 4, 1)
+// TESTATOA(ABGR, uint8_t, 4, 4, 1, AR64, uint16_t, 4, 4, 1)
+// TESTATOA(ABGR, uint8_t, 4, 4, 1, AB64, uint16_t, 4, 4, 1)
+TESTATOA(AR64, uint16_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1)
+TESTATOA(AB64, uint16_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1)
+TESTATOA(AR64, uint16_t, 4, 4, 1, ABGR, uint8_t, 4, 4, 1)
+TESTATOA(AB64, uint16_t, 4, 4, 1, ABGR, uint8_t, 4, 4, 1)
+TESTATOA(AR64, uint16_t, 4, 4, 1, AB64, uint16_t, 4, 4, 1)
+TESTATOA(AB64, uint16_t, 4, 4, 1, AR64, uint16_t, 4, 4, 1)
+
#define TESTATOBDI(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, FMT_B, BPP_B, STRIDE_B, \
HEIGHT_B, W1280, N, NEG, OFF) \
TEST_F(LibYUVConvertTest, FMT_A##To##FMT_B##Dither##N) { \
@@ -2065,6 +2287,9 @@ TEST_F(LibYUVConvertTest, TestMJPGToI420) {
int half_height = (height + 1) / 2;
int benchmark_iterations = benchmark_iterations_ * benchmark_width_ *
benchmark_height_ / (width * height);
+ if (benchmark_iterations < 1) {
+ benchmark_iterations = 1;
+ }
align_buffer_page_end(dst_y, width * height);
align_buffer_page_end(dst_u, half_width * half_height);
@@ -2099,6 +2324,9 @@ TEST_F(LibYUVConvertTest, TestMJPGToI420_NV21) {
int half_height = (height + 1) / 2;
int benchmark_iterations = benchmark_iterations_ * benchmark_width_ *
benchmark_height_ / (width * height);
+ if (benchmark_iterations < 1) {
+ benchmark_iterations = 1;
+ }
// Convert to NV21
align_buffer_page_end(dst_y, width * height);
@@ -2158,6 +2386,9 @@ TEST_F(LibYUVConvertTest, TestMJPGToI420_NV12) {
int half_height = (height + 1) / 2;
int benchmark_iterations = benchmark_iterations_ * benchmark_width_ *
benchmark_height_ / (width * height);
+ if (benchmark_iterations < 1) {
+ benchmark_iterations = 1;
+ }
// Convert to NV12
align_buffer_page_end(dst_y, width * height);
@@ -2217,6 +2448,9 @@ TEST_F(LibYUVConvertTest, TestMJPGToNV21_420) {
int half_height = (height + 1) / 2;
int benchmark_iterations = benchmark_iterations_ * benchmark_width_ *
benchmark_height_ / (width * height);
+ if (benchmark_iterations < 1) {
+ benchmark_iterations = 1;
+ }
align_buffer_page_end(dst_y, width * height);
align_buffer_page_end(dst_uv, half_width * half_height * 2);
@@ -2247,6 +2481,9 @@ TEST_F(LibYUVConvertTest, TestMJPGToNV12_420) {
int half_height = (height + 1) / 2;
int benchmark_iterations = benchmark_iterations_ * benchmark_width_ *
benchmark_height_ / (width * height);
+ if (benchmark_iterations < 1) {
+ benchmark_iterations = 1;
+ }
align_buffer_page_end(dst_y, width * height);
align_buffer_page_end(dst_uv, half_width * half_height * 2);
@@ -2282,6 +2519,9 @@ TEST_F(LibYUVConvertTest, DISABLED_TestMJPGToNV21_422) {
int half_height = (height + 1) / 2;
int benchmark_iterations = benchmark_iterations_ * benchmark_width_ *
benchmark_height_ / (width * height);
+ if (benchmark_iterations < 1) {
+ benchmark_iterations = 1;
+ }
align_buffer_page_end(dst_y, width * height);
align_buffer_page_end(dst_uv, half_width * half_height * 2);
@@ -2312,6 +2552,9 @@ TEST_F(LibYUVConvertTest, DISABLED_TestMJPGToNV12_422) {
int half_height = (height + 1) / 2;
int benchmark_iterations = benchmark_iterations_ * benchmark_width_ *
benchmark_height_ / (width * height);
+ if (benchmark_iterations < 1) {
+ benchmark_iterations = 1;
+ }
align_buffer_page_end(dst_y, width * height);
align_buffer_page_end(dst_uv, half_width * half_height * 2);
@@ -2346,6 +2589,9 @@ TEST_F(LibYUVConvertTest, TestMJPGToNV21_400) {
int half_height = (height + 1) / 2;
int benchmark_iterations = benchmark_iterations_ * benchmark_width_ *
benchmark_height_ / (width * height);
+ if (benchmark_iterations < 1) {
+ benchmark_iterations = 1;
+ }
align_buffer_page_end(dst_y, width * height);
align_buffer_page_end(dst_uv, half_width * half_height * 2);
@@ -2376,6 +2622,9 @@ TEST_F(LibYUVConvertTest, TestMJPGToNV12_400) {
int half_height = (height + 1) / 2;
int benchmark_iterations = benchmark_iterations_ * benchmark_width_ *
benchmark_height_ / (width * height);
+ if (benchmark_iterations < 1) {
+ benchmark_iterations = 1;
+ }
align_buffer_page_end(dst_y, width * height);
align_buffer_page_end(dst_uv, half_width * half_height * 2);
@@ -2410,6 +2659,9 @@ TEST_F(LibYUVConvertTest, TestMJPGToNV21_444) {
int half_height = (height + 1) / 2;
int benchmark_iterations = benchmark_iterations_ * benchmark_width_ *
benchmark_height_ / (width * height);
+ if (benchmark_iterations < 1) {
+ benchmark_iterations = 1;
+ }
align_buffer_page_end(dst_y, width * height);
align_buffer_page_end(dst_uv, half_width * half_height * 2);
@@ -2440,6 +2692,9 @@ TEST_F(LibYUVConvertTest, TestMJPGToNV12_444) {
int half_height = (height + 1) / 2;
int benchmark_iterations = benchmark_iterations_ * benchmark_width_ *
benchmark_height_ / (width * height);
+ if (benchmark_iterations < 1) {
+ benchmark_iterations = 1;
+ }
align_buffer_page_end(dst_y, width * height);
align_buffer_page_end(dst_uv, half_width * half_height * 2);
@@ -2472,6 +2727,9 @@ TEST_F(LibYUVConvertTest, TestMJPGToARGB) {
int benchmark_iterations = benchmark_iterations_ * benchmark_width_ *
benchmark_height_ / (width * height);
+ if (benchmark_iterations < 1) {
+ benchmark_iterations = 1;
+ }
align_buffer_page_end(dst_argb, width * height * 4);
for (int times = 0; times < benchmark_iterations; ++times) {
@@ -2921,6 +3179,51 @@ TESTPLANARTOBD(I420, 2, 2, RGB565, 2, 2, 1, ARGB, 4)
TESTPTOB(TestYUY2ToNV12, YUY2ToI420, YUY2ToNV12)
TESTPTOB(TestUYVYToNV12, UYVYToI420, UYVYToNV12)
+TEST_F(LibYUVConvertTest, MM21ToYUY2) {
+ const int kWidth = (benchmark_width_ + 15) & (~15);
+ const int kHeight = (benchmark_height_ + 31) & (~31);
+
+ align_buffer_page_end(orig_y, kWidth * kHeight);
+ align_buffer_page_end(orig_uv,
+ 2 * SUBSAMPLE(kWidth, 2) * SUBSAMPLE(kHeight, 2));
+
+ align_buffer_page_end(tmp_y, kWidth * kHeight);
+ align_buffer_page_end(tmp_u, SUBSAMPLE(kWidth, 2) * SUBSAMPLE(kHeight, 2));
+ align_buffer_page_end(tmp_v, SUBSAMPLE(kWidth, 2) * SUBSAMPLE(kHeight, 2));
+
+ align_buffer_page_end(dst_yuyv, 4 * SUBSAMPLE(kWidth, 2) * kHeight);
+ align_buffer_page_end(golden_yuyv, 4 * SUBSAMPLE(kWidth, 2) * kHeight);
+
+ MemRandomize(orig_y, kWidth * kHeight);
+ MemRandomize(orig_uv, 2 * SUBSAMPLE(kWidth, 2) * SUBSAMPLE(kHeight, 2));
+
+ /* Convert MM21 to YUY2 in 2 steps for reference */
+ libyuv::MM21ToI420(orig_y, kWidth, orig_uv, 2 * SUBSAMPLE(kWidth, 2), tmp_y,
+ kWidth, tmp_u, SUBSAMPLE(kWidth, 2), tmp_v,
+ SUBSAMPLE(kWidth, 2), kWidth, kHeight);
+ libyuv::I420ToYUY2(tmp_y, kWidth, tmp_u, SUBSAMPLE(kWidth, 2), tmp_v,
+ SUBSAMPLE(kWidth, 2), golden_yuyv,
+ 4 * SUBSAMPLE(kWidth, 2), kWidth, kHeight);
+
+ /* Convert to NV12 */
+ for (int i = 0; i < benchmark_iterations_; ++i) {
+ libyuv::MM21ToYUY2(orig_y, kWidth, orig_uv, 2 * SUBSAMPLE(kWidth, 2),
+ dst_yuyv, 4 * SUBSAMPLE(kWidth, 2), kWidth, kHeight);
+ }
+
+ for (int i = 0; i < 4 * SUBSAMPLE(kWidth, 2) * kHeight; ++i) {
+ EXPECT_EQ(dst_yuyv[i], golden_yuyv[i]);
+ }
+
+ free_aligned_buffer_page_end(orig_y);
+ free_aligned_buffer_page_end(orig_uv);
+ free_aligned_buffer_page_end(tmp_y);
+ free_aligned_buffer_page_end(tmp_u);
+ free_aligned_buffer_page_end(tmp_v);
+ free_aligned_buffer_page_end(dst_yuyv);
+ free_aligned_buffer_page_end(golden_yuyv);
+}
+
// Transitive test. A to B to C is same as A to C.
// Benchmarks A To B to C for comparison to 1 step, benchmarked elsewhere.
#define TESTPLANARTOEI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, SUB_B, BPP_B, \
@@ -3353,6 +3656,8 @@ TEST_F(LibYUVConvertTest, ABGRToAR30Row_Opt) {
I012ToARGBMatrix(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j)
#define I012ToAR30(a, b, c, d, e, f, g, h, i, j) \
I012ToAR30Matrix(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j)
+#define I012ToAB30(a, b, c, d, e, f, g, h, i, j) \
+ I012ToAB30Matrix(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j)
#define I410ToARGB(a, b, c, d, e, f, g, h, i, j) \
I410ToARGBMatrix(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j)
@@ -3495,6 +3800,7 @@ TESTPLANAR16TOB(H410, 1, 1, 0x3ff, AB30, 4, 4, 1)
TESTPLANAR16TOB(U410, 1, 1, 0x3ff, AR30, 4, 4, 1)
TESTPLANAR16TOB(U410, 1, 1, 0x3ff, AB30, 4, 4, 1)
TESTPLANAR16TOB(I012, 2, 2, 0xfff, AR30, 4, 4, 1)
+TESTPLANAR16TOB(I012, 2, 2, 0xfff, AB30, 4, 4, 1)
TESTPLANAR16TOB(I010, 2, 2, 0x3ff, AR30Filter, 4, 4, 1)
TESTPLANAR16TOB(I210, 2, 1, 0x3ff, AR30Filter, 4, 4, 1)
#endif // LITTLE_ENDIAN_ONLY_TEST
@@ -3733,8 +4039,8 @@ TESTQPLANAR16TOB(I010Alpha, 2, 2, ARGBFilter, 4, 4, 1, 10)
TESTQPLANAR16TOB(I210Alpha, 2, 1, ARGBFilter, 4, 4, 1, 10)
#endif // DISABLE_SLOW_TESTS
-#define TESTBIPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, \
- ALIGN, YALIGN, W1280, N, NEG, SOFF, DOFF, S_DEPTH) \
+#define TESTBP16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
+ YALIGN, W1280, N, NEG, SOFF, DOFF, S_DEPTH) \
TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##N) { \
const int kWidth = W1280; \
const int kHeight = ALIGNINT(benchmark_height_, YALIGN); \
@@ -3777,16 +4083,16 @@ TESTQPLANAR16TOB(I210Alpha, 2, 1, ARGBFilter, 4, 4, 1, 10)
free_aligned_buffer_page_end(dst_argb_opt); \
}
-#define TESTBIPLANAR16TOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, \
- ALIGN, YALIGN, S_DEPTH) \
- TESTBIPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
- YALIGN, benchmark_width_ + 1, _Any, +, 0, 0, S_DEPTH) \
- TESTBIPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
- YALIGN, benchmark_width_, _Unaligned, +, 4, 4, S_DEPTH) \
- TESTBIPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
- YALIGN, benchmark_width_, _Invert, -, 0, 0, S_DEPTH) \
- TESTBIPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
- YALIGN, benchmark_width_, _Opt, +, 0, 0, S_DEPTH)
+#define TESTBP16TOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
+ YALIGN, S_DEPTH) \
+ TESTBP16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, YALIGN, \
+ benchmark_width_ + 1, _Any, +, 0, 0, S_DEPTH) \
+ TESTBP16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, YALIGN, \
+ benchmark_width_, _Unaligned, +, 4, 4, S_DEPTH) \
+ TESTBP16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, YALIGN, \
+ benchmark_width_, _Invert, -, 0, 0, S_DEPTH) \
+ TESTBP16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, YALIGN, \
+ benchmark_width_, _Opt, +, 0, 0, S_DEPTH)
#define P010ToARGB(a, b, c, d, e, f, g, h) \
P010ToARGBMatrix(a, b, c, d, e, f, &kYuvH709Constants, g, h)
@@ -3829,23 +4135,23 @@ TESTQPLANAR16TOB(I210Alpha, 2, 1, ARGBFilter, 4, 4, 1, 10)
kFilterBilinear)
#if !defined(DISABLE_SLOW_TESTS) || defined(__x86_64__) || defined(__i386__)
-TESTBIPLANAR16TOB(P010, 2, 2, ARGB, 4, 4, 1, 10)
-TESTBIPLANAR16TOB(P210, 2, 1, ARGB, 4, 4, 1, 10)
-TESTBIPLANAR16TOB(P012, 2, 2, ARGB, 4, 4, 1, 12)
-TESTBIPLANAR16TOB(P212, 2, 1, ARGB, 4, 4, 1, 12)
-TESTBIPLANAR16TOB(P016, 2, 2, ARGB, 4, 4, 1, 16)
-TESTBIPLANAR16TOB(P216, 2, 1, ARGB, 4, 4, 1, 16)
-TESTBIPLANAR16TOB(P010, 2, 2, ARGBFilter, 4, 4, 1, 10)
-TESTBIPLANAR16TOB(P210, 2, 1, ARGBFilter, 4, 4, 1, 10)
+TESTBP16TOB(P010, 2, 2, ARGB, 4, 4, 1, 10)
+TESTBP16TOB(P210, 2, 1, ARGB, 4, 4, 1, 10)
+TESTBP16TOB(P012, 2, 2, ARGB, 4, 4, 1, 12)
+TESTBP16TOB(P212, 2, 1, ARGB, 4, 4, 1, 12)
+TESTBP16TOB(P016, 2, 2, ARGB, 4, 4, 1, 16)
+TESTBP16TOB(P216, 2, 1, ARGB, 4, 4, 1, 16)
+TESTBP16TOB(P010, 2, 2, ARGBFilter, 4, 4, 1, 10)
+TESTBP16TOB(P210, 2, 1, ARGBFilter, 4, 4, 1, 10)
#ifdef LITTLE_ENDIAN_ONLY_TEST
-TESTBIPLANAR16TOB(P010, 2, 2, AR30, 4, 4, 1, 10)
-TESTBIPLANAR16TOB(P210, 2, 1, AR30, 4, 4, 1, 10)
-TESTBIPLANAR16TOB(P012, 2, 2, AR30, 4, 4, 1, 12)
-TESTBIPLANAR16TOB(P212, 2, 1, AR30, 4, 4, 1, 12)
-TESTBIPLANAR16TOB(P016, 2, 2, AR30, 4, 4, 1, 16)
-TESTBIPLANAR16TOB(P216, 2, 1, AR30, 4, 4, 1, 16)
-TESTBIPLANAR16TOB(P010, 2, 2, AR30Filter, 4, 4, 1, 10)
-TESTBIPLANAR16TOB(P210, 2, 1, AR30Filter, 4, 4, 1, 10)
+TESTBP16TOB(P010, 2, 2, AR30, 4, 4, 1, 10)
+TESTBP16TOB(P210, 2, 1, AR30, 4, 4, 1, 10)
+TESTBP16TOB(P012, 2, 2, AR30, 4, 4, 1, 12)
+TESTBP16TOB(P212, 2, 1, AR30, 4, 4, 1, 12)
+TESTBP16TOB(P016, 2, 2, AR30, 4, 4, 1, 16)
+TESTBP16TOB(P216, 2, 1, AR30, 4, 4, 1, 16)
+TESTBP16TOB(P010, 2, 2, AR30Filter, 4, 4, 1, 10)
+TESTBP16TOB(P210, 2, 1, AR30Filter, 4, 4, 1, 10)
#endif // LITTLE_ENDIAN_ONLY_TEST
#endif // DISABLE_SLOW_TESTS
diff --git a/files/unit_test/cpu_test.cc b/files/unit_test/cpu_test.cc
index 080778f5..93867fa7 100644
--- a/files/unit_test/cpu_test.cc
+++ b/files/unit_test/cpu_test.cc
@@ -20,13 +20,23 @@ namespace libyuv {
TEST_F(LibYUVBaseTest, TestCpuHas) {
int cpu_flags = TestCpuFlag(-1);
- printf("Cpu Flags %d\n", cpu_flags);
+ printf("Cpu Flags 0x%x\n", cpu_flags);
#if defined(__arm__) || defined(__aarch64__)
int has_arm = TestCpuFlag(kCpuHasARM);
- printf("Has ARM %d\n", has_arm);
+ printf("Has ARM 0x%x\n", has_arm);
int has_neon = TestCpuFlag(kCpuHasNEON);
- printf("Has NEON %d\n", has_neon);
+ printf("Has NEON 0x%x\n", has_neon);
#endif
+#if defined(__riscv) && defined(__linux__)
+ int has_riscv = TestCpuFlag(kCpuHasRISCV);
+ printf("Has RISCV 0x%x\n", has_riscv);
+ int has_rvv = TestCpuFlag(kCpuHasRVV);
+ printf("Has RVV 0x%x\n", has_rvv);
+ int has_rvvzvfh = TestCpuFlag(kCpuHasRVVZVFH);
+ printf("Has RVVZVFH 0x%x\n", has_rvvzvfh);
+#endif
+#if defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) || \
+ defined(_M_X64)
int has_x86 = TestCpuFlag(kCpuHasX86);
int has_sse2 = TestCpuFlag(kCpuHasSSE2);
int has_ssse3 = TestCpuFlag(kCpuHasSSSE3);
@@ -45,39 +55,38 @@ TEST_F(LibYUVBaseTest, TestCpuHas) {
int has_avx512vbmi2 = TestCpuFlag(kCpuHasAVX512VBMI2);
int has_avx512vbitalg = TestCpuFlag(kCpuHasAVX512VBITALG);
int has_avx512vpopcntdq = TestCpuFlag(kCpuHasAVX512VPOPCNTDQ);
- printf("Has X86 %d\n", has_x86);
- printf("Has SSE2 %d\n", has_sse2);
- printf("Has SSSE3 %d\n", has_ssse3);
- printf("Has SSE41 %d\n", has_sse41);
- printf("Has SSE42 %d\n", has_sse42);
- printf("Has AVX %d\n", has_avx);
- printf("Has AVX2 %d\n", has_avx2);
- printf("Has ERMS %d\n", has_erms);
- printf("Has FMA3 %d\n", has_fma3);
- printf("Has F16C %d\n", has_f16c);
- printf("Has GFNI %d\n", has_gfni);
- printf("Has AVX512BW %d\n", has_avx512bw);
- printf("Has AVX512VL %d\n", has_avx512vl);
- printf("Has AVX512VNNI %d\n", has_avx512vnni);
- printf("Has AVX512VBMI %d\n", has_avx512vbmi);
- printf("Has AVX512VBMI2 %d\n", has_avx512vbmi2);
- printf("Has AVX512VBITALG %d\n", has_avx512vbitalg);
- printf("Has AVX512VPOPCNTDQ %d\n", has_avx512vpopcntdq);
-
+ printf("Has X86 0x%x\n", has_x86);
+ printf("Has SSE2 0x%x\n", has_sse2);
+ printf("Has SSSE3 0x%x\n", has_ssse3);
+ printf("Has SSE41 0x%x\n", has_sse41);
+ printf("Has SSE42 0x%x\n", has_sse42);
+ printf("Has AVX 0x%x\n", has_avx);
+ printf("Has AVX2 0x%x\n", has_avx2);
+ printf("Has ERMS 0x%x\n", has_erms);
+ printf("Has FMA3 0x%x\n", has_fma3);
+ printf("Has F16C 0x%x\n", has_f16c);
+ printf("Has GFNI 0x%x\n", has_gfni);
+ printf("Has AVX512BW 0x%x\n", has_avx512bw);
+ printf("Has AVX512VL 0x%x\n", has_avx512vl);
+ printf("Has AVX512VNNI 0x%x\n", has_avx512vnni);
+ printf("Has AVX512VBMI 0x%x\n", has_avx512vbmi);
+ printf("Has AVX512VBMI2 0x%x\n", has_avx512vbmi2);
+ printf("Has AVX512VBITALG 0x%x\n", has_avx512vbitalg);
+ printf("Has AVX512VPOPCNTDQ 0x%x\n", has_avx512vpopcntdq);
+#endif
#if defined(__mips__)
int has_mips = TestCpuFlag(kCpuHasMIPS);
- printf("Has MIPS %d\n", has_mips);
+ printf("Has MIPS 0x%x\n", has_mips);
int has_msa = TestCpuFlag(kCpuHasMSA);
- printf("Has MSA %d\n", has_msa);
+ printf("Has MSA 0x%x\n", has_msa);
#endif
-
#if defined(__loongarch__)
int has_loongarch = TestCpuFlag(kCpuHasLOONGARCH);
- printf("Has LOONGARCH %d\n", has_loongarch);
+ printf("Has LOONGARCH 0x%x\n", has_loongarch);
int has_lsx = TestCpuFlag(kCpuHasLSX);
- printf("Has LSX %d\n", has_lsx);
+ printf("Has LSX 0x%x\n", has_lsx);
int has_lasx = TestCpuFlag(kCpuHasLASX);
- printf("Has LASX %d\n", has_lasx);
+ printf("Has LASX 0x%x\n", has_lasx);
#endif
}
@@ -104,27 +113,33 @@ TEST_F(LibYUVBaseTest, TestCompilerMacros) {
#ifdef __i386__
printf("__i386__ %d\n", __i386__);
#endif
-#ifdef __mips
- printf("__mips %d\n", __mips);
-#endif
-#ifdef __mips_isa_rev
- printf("__mips_isa_rev %d\n", __mips_isa_rev);
-#endif
#ifdef __x86_64__
printf("__x86_64__ %d\n", __x86_64__);
#endif
+#ifdef _M_IX86
+ printf("_M_IX86 %d\n", _M_IX86);
+#endif
+#ifdef _M_X64
+ printf("_M_X64 %d\n", _M_X64);
+#endif
#ifdef _MSC_VER
printf("_MSC_VER %d\n", _MSC_VER);
#endif
#ifdef __aarch64__
printf("__aarch64__ %d\n", __aarch64__);
#endif
-#ifdef __APPLE__
- printf("__APPLE__ %d\n", __APPLE__);
-#endif
#ifdef __arm__
printf("__arm__ %d\n", __arm__);
#endif
+#ifdef __riscv
+ printf("__riscv %d\n", __riscv);
+#endif
+#ifdef __riscv_vector
+ printf("__riscv_vector %d\n", __riscv_vector);
+#endif
+#ifdef __APPLE__
+ printf("__APPLE__ %d\n", __APPLE__);
+#endif
#ifdef __clang__
printf("__clang__ %d\n", __clang__);
#endif
@@ -140,20 +155,11 @@ TEST_F(LibYUVBaseTest, TestCompilerMacros) {
#ifdef __mips_msa
printf("__mips_msa %d\n", __mips_msa);
#endif
-#ifdef __native_client__
- printf("__native_client__ %d\n", __native_client__);
-#endif
-#ifdef __pic__
- printf("__pic__ %d\n", __pic__);
-#endif
-#ifdef __pnacl__
- printf("__pnacl__ %d\n", __pnacl__);
-#endif
-#ifdef _M_IX86
- printf("_M_IX86 %d\n", _M_IX86);
+#ifdef __mips
+ printf("__mips %d\n", __mips);
#endif
-#ifdef _M_X64
- printf("_M_X64 %d\n", _M_X64);
+#ifdef __mips_isa_rev
+ printf("__mips_isa_rev %d\n", __mips_isa_rev);
#endif
#ifdef _MIPS_ARCH_LOONGSON3A
printf("_MIPS_ARCH_LOONGSON3A %d\n", _MIPS_ARCH_LOONGSON3A);
@@ -164,6 +170,15 @@ TEST_F(LibYUVBaseTest, TestCompilerMacros) {
#ifdef _WIN32
printf("_WIN32 %d\n", _WIN32);
#endif
+#ifdef __native_client__
+ printf("__native_client__ %d\n", __native_client__);
+#endif
+#ifdef __pic__
+ printf("__pic__ %d\n", __pic__);
+#endif
+#ifdef __pnacl__
+ printf("__pnacl__ %d\n", __pnacl__);
+#endif
#ifdef GG_LONGLONG
printf("GG_LONGLONG %d\n", GG_LONGLONG);
#endif
@@ -200,8 +215,9 @@ TEST_F(LibYUVBaseTest, TestCpuId) {
cpu_info[0] = cpu_info[1]; // Reorder output
cpu_info[1] = cpu_info[3];
cpu_info[3] = 0;
- printf("Cpu Vendor: %s %x %x %x\n", reinterpret_cast<char*>(&cpu_info[0]),
- cpu_info[0], cpu_info[1], cpu_info[2]);
+ printf("Cpu Vendor: %s 0x%x 0x%x 0x%x\n",
+ reinterpret_cast<char*>(&cpu_info[0]), cpu_info[0], cpu_info[1],
+ cpu_info[2]);
EXPECT_EQ(12u, strlen(reinterpret_cast<char*>(&cpu_info[0])));
// CPU Family and Model
@@ -264,6 +280,32 @@ TEST_F(LibYUVBaseTest, TestLinuxMipsMsa) {
}
}
+TEST_F(LibYUVBaseTest, TestLinuxRVV) {
+ if (FileExists("../../unit_test/testdata/riscv64.txt")) {
+ printf("Note: testing to load \"../../unit_test/testdata/riscv64.txt\"\n");
+
+ EXPECT_EQ(0, RiscvCpuCaps("../../unit_test/testdata/riscv64.txt"));
+ EXPECT_EQ(kCpuHasRVV,
+ RiscvCpuCaps("../../unit_test/testdata/riscv64_rvv.txt"));
+ EXPECT_EQ(kCpuHasRVV | kCpuHasRVVZVFH,
+ RiscvCpuCaps("../../unit_test/testdata/riscv64_rvv_zvfh.txt"));
+ } else {
+ printf(
+ "WARNING: unable to load "
+ "\"../../unit_test/testdata/riscv64.txt\"\n");
+ }
+#if defined(__linux__) && defined(__riscv)
+ if (FileExists("/proc/cpuinfo")) {
+ if (!(kCpuHasRVV & RiscvCpuCaps("/proc/cpuinfo"))) {
+ // This can happen on RVV emulator but /proc/cpuinfo is from host.
+ printf("WARNING: RVV build enabled but CPU does not have RVV\n");
+ }
+ } else {
+ printf("WARNING: unable to load \"/proc/cpuinfo\"\n");
+ }
+#endif
+}
+
// TODO(fbarchard): Fix clangcl test of cpuflags.
#ifdef _MSC_VER
TEST_F(LibYUVBaseTest, DISABLED_TestSetCpuFlags) {
diff --git a/files/unit_test/planar_test.cc b/files/unit_test/planar_test.cc
index 3a8c470b..ad97b87e 100644
--- a/files/unit_test/planar_test.cc
+++ b/files/unit_test/planar_test.cc
@@ -1638,29 +1638,29 @@ TEST_F(LibYUVPlanarTest, TestDetilePlane) {
int i, j;
// orig is tiled. Allocate enough memory for tiles.
- int orig_width = (benchmark_width_ + 15) & ~15;
- int orig_height = (benchmark_height_ + 15) & ~15;
- int orig_plane_size = orig_width * orig_height;
+ int tile_width = (benchmark_width_ + 15) & ~15;
+ int tile_height = (benchmark_height_ + 15) & ~15;
+ int tile_plane_size = tile_width * tile_height;
int y_plane_size = benchmark_width_ * benchmark_height_;
- align_buffer_page_end(orig_y, orig_plane_size);
+ align_buffer_page_end(tile_y, tile_plane_size);
align_buffer_page_end(dst_c, y_plane_size);
align_buffer_page_end(dst_opt, y_plane_size);
- MemRandomize(orig_y, orig_plane_size);
+ MemRandomize(tile_y, tile_plane_size);
memset(dst_c, 0, y_plane_size);
memset(dst_opt, 0, y_plane_size);
// Disable all optimizations.
MaskCpuFlags(disable_cpu_flags_);
for (j = 0; j < benchmark_iterations_; j++) {
- DetilePlane(orig_y, orig_width, dst_c, benchmark_width_, benchmark_width_,
+ DetilePlane(tile_y, tile_width, dst_c, benchmark_width_, benchmark_width_,
benchmark_height_, 16);
}
// Enable optimizations.
MaskCpuFlags(benchmark_cpu_info_);
for (j = 0; j < benchmark_iterations_; j++) {
- DetilePlane(orig_y, orig_width, dst_opt, benchmark_width_, benchmark_width_,
+ DetilePlane(tile_y, tile_width, dst_opt, benchmark_width_, benchmark_width_,
benchmark_height_, 16);
}
@@ -1668,7 +1668,46 @@ TEST_F(LibYUVPlanarTest, TestDetilePlane) {
EXPECT_EQ(dst_c[i], dst_opt[i]);
}
- free_aligned_buffer_page_end(orig_y);
+ free_aligned_buffer_page_end(tile_y);
+ free_aligned_buffer_page_end(dst_c);
+ free_aligned_buffer_page_end(dst_opt);
+}
+
+TEST_F(LibYUVPlanarTest, TestDetilePlane_16) {
+ int i, j;
+
+ // orig is tiled. Allocate enough memory for tiles.
+ int tile_width = (benchmark_width_ + 15) & ~15;
+ int tile_height = (benchmark_height_ + 15) & ~15;
+ int tile_plane_size = tile_width * tile_height * 2;
+ int y_plane_size = benchmark_width_ * benchmark_height_ * 2;
+ align_buffer_page_end(tile_y, tile_plane_size);
+ align_buffer_page_end(dst_c, y_plane_size);
+ align_buffer_page_end(dst_opt, y_plane_size);
+
+ MemRandomize(tile_y, tile_plane_size);
+ memset(dst_c, 0, y_plane_size);
+ memset(dst_opt, 0, y_plane_size);
+
+ // Disable all optimizations.
+ MaskCpuFlags(disable_cpu_flags_);
+ for (j = 0; j < benchmark_iterations_; j++) {
+ DetilePlane_16((const uint16_t*)tile_y, tile_width, (uint16_t*)dst_c,
+ benchmark_width_, benchmark_width_, benchmark_height_, 16);
+ }
+
+ // Enable optimizations.
+ MaskCpuFlags(benchmark_cpu_info_);
+ for (j = 0; j < benchmark_iterations_; j++) {
+ DetilePlane_16((const uint16_t*)tile_y, tile_width, (uint16_t*)dst_opt,
+ benchmark_width_, benchmark_width_, benchmark_height_, 16);
+ }
+
+ for (i = 0; i < y_plane_size; ++i) {
+ EXPECT_EQ(dst_c[i], dst_opt[i]);
+ }
+
+ free_aligned_buffer_page_end(tile_y);
free_aligned_buffer_page_end(dst_c);
free_aligned_buffer_page_end(dst_opt);
}
@@ -1678,33 +1717,33 @@ TEST_F(LibYUVPlanarTest, TestDetileSplitUVPlane_Correctness) {
int i, j;
// orig is tiled. Allocate enough memory for tiles.
- int orig_width = (benchmark_width_ + 15) & ~15;
- int orig_height = (benchmark_height_ + 15) & ~15;
- int orig_plane_size = orig_width * orig_height;
+ int tile_width = (benchmark_width_ + 15) & ~15;
+ int tile_height = (benchmark_height_ + 15) & ~15;
+ int tile_plane_size = tile_width * tile_height;
int uv_plane_size = ((benchmark_width_ + 1) / 2) * benchmark_height_;
- align_buffer_page_end(orig_uv, orig_plane_size);
- align_buffer_page_end(detiled_uv, orig_plane_size);
+ align_buffer_page_end(tile_uv, tile_plane_size);
+ align_buffer_page_end(detiled_uv, tile_plane_size);
align_buffer_page_end(dst_u_two_stage, uv_plane_size);
align_buffer_page_end(dst_u_opt, uv_plane_size);
align_buffer_page_end(dst_v_two_stage, uv_plane_size);
align_buffer_page_end(dst_v_opt, uv_plane_size);
- MemRandomize(orig_uv, orig_plane_size);
- memset(detiled_uv, 0, orig_plane_size);
+ MemRandomize(tile_uv, tile_plane_size);
+ memset(detiled_uv, 0, tile_plane_size);
memset(dst_u_two_stage, 0, uv_plane_size);
memset(dst_u_opt, 0, uv_plane_size);
memset(dst_v_two_stage, 0, uv_plane_size);
memset(dst_v_opt, 0, uv_plane_size);
- DetileSplitUVPlane(orig_uv, orig_width, dst_u_opt, (benchmark_width_ + 1) / 2,
+ DetileSplitUVPlane(tile_uv, tile_width, dst_u_opt, (benchmark_width_ + 1) / 2,
dst_v_opt, (benchmark_width_ + 1) / 2, benchmark_width_,
benchmark_height_, 16);
// Benchmark 2 step conversion for comparison.
for (j = 0; j < benchmark_iterations_; j++) {
- DetilePlane(orig_uv, orig_width, detiled_uv, benchmark_width_,
+ DetilePlane(tile_uv, tile_width, detiled_uv, benchmark_width_,
benchmark_width_, benchmark_height_, 16);
- SplitUVPlane(detiled_uv, orig_width, dst_u_two_stage,
+ SplitUVPlane(detiled_uv, tile_width, dst_u_two_stage,
(benchmark_width_ + 1) / 2, dst_v_two_stage,
(benchmark_width_ + 1) / 2, (benchmark_width_ + 1) / 2,
benchmark_height_);
@@ -1715,7 +1754,7 @@ TEST_F(LibYUVPlanarTest, TestDetileSplitUVPlane_Correctness) {
EXPECT_EQ(dst_v_two_stage[i], dst_v_opt[i]);
}
- free_aligned_buffer_page_end(orig_uv);
+ free_aligned_buffer_page_end(tile_uv);
free_aligned_buffer_page_end(detiled_uv);
free_aligned_buffer_page_end(dst_u_two_stage);
free_aligned_buffer_page_end(dst_u_opt);
@@ -1727,17 +1766,17 @@ TEST_F(LibYUVPlanarTest, TestDetileSplitUVPlane_Benchmark) {
int i, j;
// orig is tiled. Allocate enough memory for tiles.
- int orig_width = (benchmark_width_ + 15) & ~15;
- int orig_height = (benchmark_height_ + 15) & ~15;
- int orig_plane_size = orig_width * orig_height;
+ int tile_width = (benchmark_width_ + 15) & ~15;
+ int tile_height = (benchmark_height_ + 15) & ~15;
+ int tile_plane_size = tile_width * tile_height;
int uv_plane_size = ((benchmark_width_ + 1) / 2) * benchmark_height_;
- align_buffer_page_end(orig_uv, orig_plane_size);
+ align_buffer_page_end(tile_uv, tile_plane_size);
align_buffer_page_end(dst_u_c, uv_plane_size);
align_buffer_page_end(dst_u_opt, uv_plane_size);
align_buffer_page_end(dst_v_c, uv_plane_size);
align_buffer_page_end(dst_v_opt, uv_plane_size);
- MemRandomize(orig_uv, orig_plane_size);
+ MemRandomize(tile_uv, tile_plane_size);
memset(dst_u_c, 0, uv_plane_size);
memset(dst_u_opt, 0, uv_plane_size);
memset(dst_v_c, 0, uv_plane_size);
@@ -1746,7 +1785,7 @@ TEST_F(LibYUVPlanarTest, TestDetileSplitUVPlane_Benchmark) {
// Disable all optimizations.
MaskCpuFlags(disable_cpu_flags_);
- DetileSplitUVPlane(orig_uv, orig_width, dst_u_c, (benchmark_width_ + 1) / 2,
+ DetileSplitUVPlane(tile_uv, tile_width, dst_u_c, (benchmark_width_ + 1) / 2,
dst_v_c, (benchmark_width_ + 1) / 2, benchmark_width_,
benchmark_height_, 16);
@@ -1755,7 +1794,7 @@ TEST_F(LibYUVPlanarTest, TestDetileSplitUVPlane_Benchmark) {
for (j = 0; j < benchmark_iterations_; j++) {
DetileSplitUVPlane(
- orig_uv, orig_width, dst_u_opt, (benchmark_width_ + 1) / 2, dst_v_opt,
+ tile_uv, tile_width, dst_u_opt, (benchmark_width_ + 1) / 2, dst_v_opt,
(benchmark_width_ + 1) / 2, benchmark_width_, benchmark_height_, 16);
}
@@ -1764,7 +1803,7 @@ TEST_F(LibYUVPlanarTest, TestDetileSplitUVPlane_Benchmark) {
EXPECT_EQ(dst_v_c[i], dst_v_opt[i]);
}
- free_aligned_buffer_page_end(orig_uv);
+ free_aligned_buffer_page_end(tile_uv);
free_aligned_buffer_page_end(dst_u_c);
free_aligned_buffer_page_end(dst_u_opt);
free_aligned_buffer_page_end(dst_v_c);
@@ -3495,8 +3534,8 @@ TESTTPLANARTOP(MergeXR30, uint16_t, uint8_t, 16)
// TODO(fbarchard): improve test for platforms and cpu detect
#ifdef HAS_MERGEUVROW_16_AVX2
TEST_F(LibYUVPlanarTest, MergeUVRow_16_Opt) {
- // Round count up to multiple of 16
- const int kPixels = (benchmark_width_ * benchmark_height_ + 15) & ~15;
+ // Round count up to multiple of 8
+ const int kPixels = (benchmark_width_ * benchmark_height_ + 7) & ~7;
align_buffer_page_end(src_pixels_u, kPixels * 2);
align_buffer_page_end(src_pixels_v, kPixels * 2);
diff --git a/files/unit_test/rotate_argb_test.cc b/files/unit_test/rotate_argb_test.cc
index 01ed69ca..74952c4e 100644
--- a/files/unit_test/rotate_argb_test.cc
+++ b/files/unit_test/rotate_argb_test.cc
@@ -225,4 +225,110 @@ TEST_F(LibYUVRotateTest, RotatePlane90_TestStride) {
free_aligned_buffer_page_end(src_argb);
}
+static void TestRotatePlane_16(int src_width,
+ int src_height,
+ int dst_width,
+ int dst_height,
+ libyuv::RotationMode mode,
+ int benchmark_iterations,
+ int disable_cpu_flags,
+ int benchmark_cpu_info) {
+ if (src_width < 1) {
+ src_width = 1;
+ }
+ if (src_height < 1) {
+ src_height = 1;
+ }
+ if (dst_width < 1) {
+ dst_width = 1;
+ }
+ if (dst_height < 1) {
+ dst_height = 1;
+ }
+ int src_stride = src_width;
+ int src_plane_size = src_stride * abs(src_height);
+ align_buffer_page_end_16(src, src_plane_size);
+ for (int i = 0; i < src_plane_size; ++i) {
+ src[i] = fastrand() & 0xff;
+ }
+
+ int dst_stride = dst_width;
+ int dst_plane_size = dst_stride * dst_height;
+ align_buffer_page_end_16(dst_c, dst_plane_size);
+ align_buffer_page_end_16(dst_opt, dst_plane_size);
+ memset(dst_c, 2, dst_plane_size);
+ memset(dst_opt, 3, dst_plane_size);
+
+ MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization.
+ RotatePlane_16(src, src_stride, dst_c, dst_stride, src_width, src_height,
+ mode);
+
+ MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization.
+ for (int i = 0; i < benchmark_iterations; ++i) {
+ RotatePlane_16(src, src_stride, dst_opt, dst_stride, src_width, src_height,
+ mode);
+ }
+
+ // Rotation should be exact.
+ for (int i = 0; i < dst_plane_size; ++i) {
+ EXPECT_EQ(dst_c[i], dst_opt[i]);
+ }
+
+ free_aligned_buffer_page_end_16(dst_c);
+ free_aligned_buffer_page_end_16(dst_opt);
+ free_aligned_buffer_page_end_16(src);
+}
+
+TEST_F(LibYUVRotateTest, RotatePlane0_16_Opt) {
+ TestRotatePlane_16(benchmark_width_, benchmark_height_, benchmark_width_,
+ benchmark_height_, kRotate0, benchmark_iterations_,
+ disable_cpu_flags_, benchmark_cpu_info_);
+}
+
+TEST_F(LibYUVRotateTest, RotatePlane90_16_Opt) {
+ TestRotatePlane_16(benchmark_width_, benchmark_height_, benchmark_height_,
+ benchmark_width_, kRotate90, benchmark_iterations_,
+ disable_cpu_flags_, benchmark_cpu_info_);
+}
+
+TEST_F(LibYUVRotateTest, RotatePlane180_16_Opt) {
+ TestRotatePlane_16(benchmark_width_, benchmark_height_, benchmark_width_,
+ benchmark_height_, kRotate180, benchmark_iterations_,
+ disable_cpu_flags_, benchmark_cpu_info_);
+}
+
+TEST_F(LibYUVRotateTest, RotatePlane270_16_Opt) {
+ TestRotatePlane_16(benchmark_width_, benchmark_height_, benchmark_height_,
+ benchmark_width_, kRotate270, benchmark_iterations_,
+ disable_cpu_flags_, benchmark_cpu_info_);
+}
+
+TEST_F(LibYUVRotateTest, RotatePlane0_16_Odd) {
+ TestRotatePlane_16(benchmark_width_ + 1, benchmark_height_ + 1,
+ benchmark_width_ + 1, benchmark_height_ + 1, kRotate0,
+ benchmark_iterations_, disable_cpu_flags_,
+ benchmark_cpu_info_);
+}
+
+TEST_F(LibYUVRotateTest, RotatePlane90_16_Odd) {
+ TestRotatePlane_16(benchmark_width_ + 1, benchmark_height_ + 1,
+ benchmark_height_ + 1, benchmark_width_ + 1, kRotate90,
+ benchmark_iterations_, disable_cpu_flags_,
+ benchmark_cpu_info_);
+}
+
+TEST_F(LibYUVRotateTest, RotatePlane180_16_Odd) {
+ TestRotatePlane_16(benchmark_width_ + 1, benchmark_height_ + 1,
+ benchmark_width_ + 1, benchmark_height_ + 1, kRotate180,
+ benchmark_iterations_, disable_cpu_flags_,
+ benchmark_cpu_info_);
+}
+
+TEST_F(LibYUVRotateTest, RotatePlane270_16_Odd) {
+ TestRotatePlane_16(benchmark_width_ + 1, benchmark_height_ + 1,
+ benchmark_height_ + 1, benchmark_width_ + 1, kRotate270,
+ benchmark_iterations_, disable_cpu_flags_,
+ benchmark_cpu_info_);
+}
+
} // namespace libyuv
diff --git a/files/unit_test/rotate_test.cc b/files/unit_test/rotate_test.cc
index d3887414..abc08efa 100644
--- a/files/unit_test/rotate_test.cc
+++ b/files/unit_test/rotate_test.cc
@@ -14,6 +14,10 @@
#include "libyuv/cpu_id.h"
#include "libyuv/rotate.h"
+#ifdef ENABLE_ROW_TESTS
+#include "libyuv/rotate_row.h"
+#endif
+
namespace libyuv {
#define SUBSAMPLE(v, a) ((((v) + (a)-1)) / (a))
@@ -596,4 +600,363 @@ TESTAPLANARTOP(Android420, NV21, 2, 1, 0, 2, 2, I420, 2, 2)
#undef TESTAPLANARTOP
#undef TESTAPLANARTOPI
+static void I010TestRotate(int src_width,
+ int src_height,
+ int dst_width,
+ int dst_height,
+ libyuv::RotationMode mode,
+ int benchmark_iterations,
+ int disable_cpu_flags,
+ int benchmark_cpu_info) {
+ if (src_width < 1) {
+ src_width = 1;
+ }
+ if (src_height == 0) {
+ src_height = 1;
+ }
+ if (dst_width < 1) {
+ dst_width = 1;
+ }
+ if (dst_height < 1) {
+ dst_height = 1;
+ }
+ int src_i010_y_size = src_width * Abs(src_height);
+ int src_i010_uv_size = ((src_width + 1) / 2) * ((Abs(src_height) + 1) / 2);
+ int src_i010_size = src_i010_y_size + src_i010_uv_size * 2;
+ align_buffer_page_end_16(src_i010, src_i010_size);
+ for (int i = 0; i < src_i010_size; ++i) {
+ src_i010[i] = fastrand() & 0x3ff;
+ }
+
+ int dst_i010_y_size = dst_width * dst_height;
+ int dst_i010_uv_size = ((dst_width + 1) / 2) * ((dst_height + 1) / 2);
+ int dst_i010_size = dst_i010_y_size + dst_i010_uv_size * 2;
+ align_buffer_page_end_16(dst_i010_c, dst_i010_size);
+ align_buffer_page_end_16(dst_i010_opt, dst_i010_size);
+ memset(dst_i010_c, 2, dst_i010_size * 2);
+ memset(dst_i010_opt, 3, dst_i010_size * 2);
+
+ MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization.
+ I010Rotate(src_i010, src_width, src_i010 + src_i010_y_size,
+ (src_width + 1) / 2, src_i010 + src_i010_y_size + src_i010_uv_size,
+ (src_width + 1) / 2, dst_i010_c, dst_width,
+ dst_i010_c + dst_i010_y_size, (dst_width + 1) / 2,
+ dst_i010_c + dst_i010_y_size + dst_i010_uv_size,
+ (dst_width + 1) / 2, src_width, src_height, mode);
+
+ MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization.
+ for (int i = 0; i < benchmark_iterations; ++i) {
+ I010Rotate(
+ src_i010, src_width, src_i010 + src_i010_y_size, (src_width + 1) / 2,
+ src_i010 + src_i010_y_size + src_i010_uv_size, (src_width + 1) / 2,
+ dst_i010_opt, dst_width, dst_i010_opt + dst_i010_y_size,
+ (dst_width + 1) / 2, dst_i010_opt + dst_i010_y_size + dst_i010_uv_size,
+ (dst_width + 1) / 2, src_width, src_height, mode);
+ }
+
+ // Rotation should be exact.
+ for (int i = 0; i < dst_i010_size; ++i) {
+ EXPECT_EQ(dst_i010_c[i], dst_i010_opt[i]);
+ }
+
+ free_aligned_buffer_page_end_16(dst_i010_c);
+ free_aligned_buffer_page_end_16(dst_i010_opt);
+ free_aligned_buffer_page_end_16(src_i010);
+}
+
+TEST_F(LibYUVRotateTest, I010Rotate0_Opt) {
+ I010TestRotate(benchmark_width_, benchmark_height_, benchmark_width_,
+ benchmark_height_, kRotate0, benchmark_iterations_,
+ disable_cpu_flags_, benchmark_cpu_info_);
+}
+
+TEST_F(LibYUVRotateTest, I010Rotate90_Opt) {
+ I010TestRotate(benchmark_width_, benchmark_height_, benchmark_height_,
+ benchmark_width_, kRotate90, benchmark_iterations_,
+ disable_cpu_flags_, benchmark_cpu_info_);
+}
+
+TEST_F(LibYUVRotateTest, I010Rotate180_Opt) {
+ I010TestRotate(benchmark_width_, benchmark_height_, benchmark_width_,
+ benchmark_height_, kRotate180, benchmark_iterations_,
+ disable_cpu_flags_, benchmark_cpu_info_);
+}
+
+TEST_F(LibYUVRotateTest, I010Rotate270_Opt) {
+ I010TestRotate(benchmark_width_, benchmark_height_, benchmark_height_,
+ benchmark_width_, kRotate270, benchmark_iterations_,
+ disable_cpu_flags_, benchmark_cpu_info_);
+}
+
+static void I210TestRotate(int src_width,
+ int src_height,
+ int dst_width,
+ int dst_height,
+ libyuv::RotationMode mode,
+ int benchmark_iterations,
+ int disable_cpu_flags,
+ int benchmark_cpu_info) {
+ if (src_width < 1) {
+ src_width = 1;
+ }
+ if (src_height == 0) {
+ src_height = 1;
+ }
+ if (dst_width < 1) {
+ dst_width = 1;
+ }
+ if (dst_height < 1) {
+ dst_height = 1;
+ }
+ int src_i210_y_size = src_width * Abs(src_height);
+ int src_i210_uv_size = ((src_width + 1) / 2) * Abs(src_height);
+ int src_i210_size = src_i210_y_size + src_i210_uv_size * 2;
+ align_buffer_page_end_16(src_i210, src_i210_size);
+ for (int i = 0; i < src_i210_size; ++i) {
+ src_i210[i] = fastrand() & 0x3ff;
+ }
+
+ int dst_i210_y_size = dst_width * dst_height;
+ int dst_i210_uv_size = ((dst_width + 1) / 2) * dst_height;
+ int dst_i210_size = dst_i210_y_size + dst_i210_uv_size * 2;
+ align_buffer_page_end_16(dst_i210_c, dst_i210_size);
+ align_buffer_page_end_16(dst_i210_opt, dst_i210_size);
+ memset(dst_i210_c, 2, dst_i210_size * 2);
+ memset(dst_i210_opt, 3, dst_i210_size * 2);
+
+ MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization.
+ I210Rotate(src_i210, src_width, src_i210 + src_i210_y_size,
+ (src_width + 1) / 2, src_i210 + src_i210_y_size + src_i210_uv_size,
+ (src_width + 1) / 2, dst_i210_c, dst_width,
+ dst_i210_c + dst_i210_y_size, (dst_width + 1) / 2,
+ dst_i210_c + dst_i210_y_size + dst_i210_uv_size,
+ (dst_width + 1) / 2, src_width, src_height, mode);
+
+ MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization.
+ for (int i = 0; i < benchmark_iterations; ++i) {
+ I210Rotate(
+ src_i210, src_width, src_i210 + src_i210_y_size, (src_width + 1) / 2,
+ src_i210 + src_i210_y_size + src_i210_uv_size, (src_width + 1) / 2,
+ dst_i210_opt, dst_width, dst_i210_opt + dst_i210_y_size,
+ (dst_width + 1) / 2, dst_i210_opt + dst_i210_y_size + dst_i210_uv_size,
+ (dst_width + 1) / 2, src_width, src_height, mode);
+ }
+
+ // Rotation should be exact.
+ for (int i = 0; i < dst_i210_size; ++i) {
+ EXPECT_EQ(dst_i210_c[i], dst_i210_opt[i]);
+ }
+
+ free_aligned_buffer_page_end_16(dst_i210_c);
+ free_aligned_buffer_page_end_16(dst_i210_opt);
+ free_aligned_buffer_page_end_16(src_i210);
+}
+
+TEST_F(LibYUVRotateTest, I210Rotate0_Opt) {
+ I210TestRotate(benchmark_width_, benchmark_height_, benchmark_width_,
+ benchmark_height_, kRotate0, benchmark_iterations_,
+ disable_cpu_flags_, benchmark_cpu_info_);
+}
+
+TEST_F(LibYUVRotateTest, I210Rotate90_Opt) {
+ I210TestRotate(benchmark_width_, benchmark_height_, benchmark_height_,
+ benchmark_width_, kRotate90, benchmark_iterations_,
+ disable_cpu_flags_, benchmark_cpu_info_);
+}
+
+TEST_F(LibYUVRotateTest, I210Rotate180_Opt) {
+ I210TestRotate(benchmark_width_, benchmark_height_, benchmark_width_,
+ benchmark_height_, kRotate180, benchmark_iterations_,
+ disable_cpu_flags_, benchmark_cpu_info_);
+}
+
+TEST_F(LibYUVRotateTest, I210Rotate270_Opt) {
+ I210TestRotate(benchmark_width_, benchmark_height_, benchmark_height_,
+ benchmark_width_, kRotate270, benchmark_iterations_,
+ disable_cpu_flags_, benchmark_cpu_info_);
+}
+
+static void I410TestRotate(int src_width,
+ int src_height,
+ int dst_width,
+ int dst_height,
+ libyuv::RotationMode mode,
+ int benchmark_iterations,
+ int disable_cpu_flags,
+ int benchmark_cpu_info) {
+ if (src_width < 1) {
+ src_width = 1;
+ }
+ if (src_height == 0) {
+ src_height = 1;
+ }
+ if (dst_width < 1) {
+ dst_width = 1;
+ }
+ if (dst_height < 1) {
+ dst_height = 1;
+ }
+ int src_i410_y_size = src_width * Abs(src_height);
+ int src_i410_uv_size = src_width * Abs(src_height);
+ int src_i410_size = src_i410_y_size + src_i410_uv_size * 2;
+ align_buffer_page_end_16(src_i410, src_i410_size);
+ for (int i = 0; i < src_i410_size; ++i) {
+ src_i410[i] = fastrand() & 0x3ff;
+ }
+
+ int dst_i410_y_size = dst_width * dst_height;
+ int dst_i410_uv_size = dst_width * dst_height;
+ int dst_i410_size = dst_i410_y_size + dst_i410_uv_size * 2;
+ align_buffer_page_end_16(dst_i410_c, dst_i410_size);
+ align_buffer_page_end_16(dst_i410_opt, dst_i410_size);
+ memset(dst_i410_c, 2, dst_i410_size * 2);
+ memset(dst_i410_opt, 3, dst_i410_size * 2);
+
+ MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization.
+ I410Rotate(src_i410, src_width, src_i410 + src_i410_y_size, src_width,
+ src_i410 + src_i410_y_size + src_i410_uv_size, src_width,
+ dst_i410_c, dst_width, dst_i410_c + dst_i410_y_size, dst_width,
+ dst_i410_c + dst_i410_y_size + dst_i410_uv_size, dst_width,
+ src_width, src_height, mode);
+
+ MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization.
+ for (int i = 0; i < benchmark_iterations; ++i) {
+ I410Rotate(src_i410, src_width, src_i410 + src_i410_y_size, src_width,
+ src_i410 + src_i410_y_size + src_i410_uv_size, src_width,
+ dst_i410_opt, dst_width, dst_i410_opt + dst_i410_y_size,
+ dst_width, dst_i410_opt + dst_i410_y_size + dst_i410_uv_size,
+ dst_width, src_width, src_height, mode);
+ }
+
+ // Rotation should be exact.
+ for (int i = 0; i < dst_i410_size; ++i) {
+ EXPECT_EQ(dst_i410_c[i], dst_i410_opt[i]);
+ }
+
+ free_aligned_buffer_page_end_16(dst_i410_c);
+ free_aligned_buffer_page_end_16(dst_i410_opt);
+ free_aligned_buffer_page_end_16(src_i410);
+}
+
+TEST_F(LibYUVRotateTest, I410Rotate0_Opt) {
+ I410TestRotate(benchmark_width_, benchmark_height_, benchmark_width_,
+ benchmark_height_, kRotate0, benchmark_iterations_,
+ disable_cpu_flags_, benchmark_cpu_info_);
+}
+
+TEST_F(LibYUVRotateTest, I410Rotate90_Opt) {
+ I410TestRotate(benchmark_width_, benchmark_height_, benchmark_height_,
+ benchmark_width_, kRotate90, benchmark_iterations_,
+ disable_cpu_flags_, benchmark_cpu_info_);
+}
+
+TEST_F(LibYUVRotateTest, I410Rotate180_Opt) {
+ I410TestRotate(benchmark_width_, benchmark_height_, benchmark_width_,
+ benchmark_height_, kRotate180, benchmark_iterations_,
+ disable_cpu_flags_, benchmark_cpu_info_);
+}
+
+TEST_F(LibYUVRotateTest, I410Rotate270_Opt) {
+ I410TestRotate(benchmark_width_, benchmark_height_, benchmark_height_,
+ benchmark_width_, kRotate270, benchmark_iterations_,
+ disable_cpu_flags_, benchmark_cpu_info_);
+}
+
+#if defined(ENABLE_ROW_TESTS)
+
+TEST_F(LibYUVRotateTest, Transpose4x4_Test) {
+ // dst width and height
+ const int width = 4;
+ const int height = 4;
+ int src_pixels[4][4];
+ int dst_pixels_c[4][4];
+ int dst_pixels_opt[4][4];
+
+ for (int i = 0; i < 4; ++i) {
+ for (int j = 0; j < 4; ++j) {
+ src_pixels[i][j] = i * 10 + j;
+ }
+ }
+ memset(dst_pixels_c, 1, width * height * 4);
+ memset(dst_pixels_opt, 2, width * height * 4);
+
+ Transpose4x4_32_C((const uint8_t*)src_pixels, height * 4,
+ (uint8_t*)dst_pixels_c, width * 4, width);
+
+ const int benchmark_iterations =
+ (benchmark_iterations_ * benchmark_width_ * benchmark_height_ + 15) /
+ (4 * 4);
+ for (int i = 0; i < benchmark_iterations; ++i) {
+#if defined(HAS_TRANSPOSE4X4_32_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ Transpose4x4_32_NEON((const uint8_t*)src_pixels, height * 4,
+ (uint8_t*)dst_pixels_opt, width * 4, width);
+ } else
+#elif defined(HAS_TRANSPOSE4X4_32_SSE2)
+ if (TestCpuFlag(kCpuHasSSE2)) {
+ Transpose4x4_32_SSE2((const uint8_t*)src_pixels, height * 4,
+ (uint8_t*)dst_pixels_opt, width * 4, width);
+ } else
+#endif
+ {
+ Transpose4x4_32_C((const uint8_t*)src_pixels, height * 4,
+ (uint8_t*)dst_pixels_opt, width * 4, width);
+ }
+ }
+
+ for (int i = 0; i < 4; ++i) {
+ for (int j = 0; j < 4; ++j) {
+ EXPECT_EQ(dst_pixels_c[i][j], src_pixels[j][i]);
+ EXPECT_EQ(dst_pixels_c[i][j], dst_pixels_opt[i][j]);
+ }
+ }
+}
+
+TEST_F(LibYUVRotateTest, Transpose4x4_Opt) {
+ // dst width and height
+ const int width = ((benchmark_width_ * benchmark_height_ + 3) / 4 + 3) & ~3;
+ const int height = 4;
+ align_buffer_page_end(src_pixels, height * width * 4);
+ align_buffer_page_end(dst_pixels_c, width * height * 4);
+ align_buffer_page_end(dst_pixels_opt, width * height * 4);
+
+ MemRandomize(src_pixels, height * width * 4);
+ memset(dst_pixels_c, 1, width * height * 4);
+ memset(dst_pixels_opt, 2, width * height * 4);
+
+ Transpose4x4_32_C((const uint8_t*)src_pixels, height * 4,
+ (uint8_t*)dst_pixels_c, width * 4, width);
+
+ for (int i = 0; i < benchmark_iterations_; ++i) {
+#if defined(HAS_TRANSPOSE4X4_32_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ Transpose4x4_32_NEON((const uint8_t*)src_pixels, height * 4,
+ (uint8_t*)dst_pixels_opt, width * 4, width);
+ } else
+#elif defined(HAS_TRANSPOSE4X4_32_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ Transpose4x4_32_AVX2((const uint8_t*)src_pixels, height * 4,
+ (uint8_t*)dst_pixels_opt, width * 4, width);
+ } else if (TestCpuFlag(kCpuHasSSE2)) {
+ Transpose4x4_32_SSE2((const uint8_t*)src_pixels, height * 4,
+ (uint8_t*)dst_pixels_opt, width * 4, width);
+ } else
+#endif
+ {
+ Transpose4x4_32_C((const uint8_t*)src_pixels, height * 4,
+ (uint8_t*)dst_pixels_opt, width * 4, width);
+ }
+ }
+
+ for (int i = 0; i < width * height; ++i) {
+ EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
+ }
+
+ free_aligned_buffer_page_end(src_pixels);
+ free_aligned_buffer_page_end(dst_pixels_c);
+ free_aligned_buffer_page_end(dst_pixels_opt);
+}
+
+#endif // ENABLE_ROW_TESTS
+
} // namespace libyuv
diff --git a/files/unit_test/scale_uv_test.cc b/files/unit_test/scale_uv_test.cc
index 3d524bef..dab217c9 100644
--- a/files/unit_test/scale_uv_test.cc
+++ b/files/unit_test/scale_uv_test.cc
@@ -39,55 +39,35 @@ static int UVTestFilter(int src_width,
return 0;
}
- int i, j;
- const int b = 0; // 128 to test for padding/stride.
- int64_t src_uv_plane_size =
- (Abs(src_width) + b * 2) * (Abs(src_height) + b * 2) * 2LL;
- int src_stride_uv = (b * 2 + Abs(src_width)) * 2;
+ int i;
+ int64_t src_uv_plane_size = Abs(src_width) * Abs(src_height) * 2LL;
+ int src_stride_uv = Abs(src_width) * 2;
+ int64_t dst_uv_plane_size = dst_width * dst_height * 2LL;
+ int dst_stride_uv = dst_width * 2;
align_buffer_page_end(src_uv, src_uv_plane_size);
- if (!src_uv) {
- printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n");
- return 0;
- }
- MemRandomize(src_uv, src_uv_plane_size);
-
- int64_t dst_uv_plane_size = (dst_width + b * 2) * (dst_height + b * 2) * 2LL;
- int dst_stride_uv = (b * 2 + dst_width) * 2;
-
align_buffer_page_end(dst_uv_c, dst_uv_plane_size);
align_buffer_page_end(dst_uv_opt, dst_uv_plane_size);
- if (!dst_uv_c || !dst_uv_opt) {
+
+ if (!src_uv || !dst_uv_c || !dst_uv_opt) {
printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n");
return 0;
}
+ MemRandomize(src_uv, src_uv_plane_size);
memset(dst_uv_c, 2, dst_uv_plane_size);
- memset(dst_uv_opt, 3, dst_uv_plane_size);
-
- // Warm up both versions for consistent benchmarks.
- MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization.
- UVScale(src_uv + (src_stride_uv * b) + b * 2, src_stride_uv, src_width,
- src_height, dst_uv_c + (dst_stride_uv * b) + b * 2, dst_stride_uv,
- dst_width, dst_height, f);
- MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization.
- UVScale(src_uv + (src_stride_uv * b) + b * 2, src_stride_uv, src_width,
- src_height, dst_uv_opt + (dst_stride_uv * b) + b * 2, dst_stride_uv,
- dst_width, dst_height, f);
+ memset(dst_uv_opt, 123, dst_uv_plane_size);
MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization.
double c_time = get_time();
- UVScale(src_uv + (src_stride_uv * b) + b * 2, src_stride_uv, src_width,
- src_height, dst_uv_c + (dst_stride_uv * b) + b * 2, dst_stride_uv,
+ UVScale(src_uv, src_stride_uv, src_width, src_height, dst_uv_c, dst_stride_uv,
dst_width, dst_height, f);
-
c_time = (get_time() - c_time);
MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization.
double opt_time = get_time();
for (i = 0; i < benchmark_iterations; ++i) {
- UVScale(src_uv + (src_stride_uv * b) + b * 2, src_stride_uv, src_width,
- src_height, dst_uv_opt + (dst_stride_uv * b) + b * 2, dst_stride_uv,
- dst_width, dst_height, f);
+ UVScale(src_uv, src_stride_uv, src_width, src_height, dst_uv_opt,
+ dst_stride_uv, dst_width, dst_height, f);
}
opt_time = (get_time() - opt_time) / benchmark_iterations;
@@ -95,18 +75,11 @@ static int UVTestFilter(int src_width,
printf("filter %d - %8d us C - %8d us OPT\n", f,
static_cast<int>(c_time * 1e6), static_cast<int>(opt_time * 1e6));
- // C version may be a little off from the optimized. Order of
- // operations may introduce rounding somewhere. So do a difference
- // of the buffers and look to see that the max difference isn't
- // over 2.
int max_diff = 0;
- for (i = b; i < (dst_height + b); ++i) {
- for (j = b * 2; j < (dst_width + b) * 2; ++j) {
- int abs_diff = Abs(dst_uv_c[(i * dst_stride_uv) + j] -
- dst_uv_opt[(i * dst_stride_uv) + j]);
- if (abs_diff > max_diff) {
- max_diff = abs_diff;
- }
+ for (i = 0; i < dst_uv_plane_size; ++i) {
+ int abs_diff = Abs(dst_uv_c[i] - dst_uv_opt[i]);
+ if (abs_diff > max_diff) {
+ max_diff = abs_diff;
}
}
@@ -121,28 +94,26 @@ static int UVTestFilter(int src_width,
#define DX(x, nom, denom) static_cast<int>((Abs(x) / nom) * nom)
#define SX(x, nom, denom) static_cast<int>((x / nom) * denom)
-#define TEST_FACTOR1(name, filter, nom, denom, max_diff) \
+#define TEST_FACTOR1(name, filter, nom, denom) \
TEST_F(LibYUVScaleTest, UVScaleDownBy##name##_##filter) { \
int diff = UVTestFilter( \
SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \
DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \
kFilter##filter, benchmark_iterations_, disable_cpu_flags_, \
benchmark_cpu_info_); \
- EXPECT_LE(diff, max_diff); \
+ EXPECT_EQ(0, diff); \
}
#if defined(ENABLE_FULL_TESTS)
-// Test a scale factor with all 4 filters. Expect unfiltered to be exact, but
-// filtering is different fixed point implementations for SSSE3, Neon and C.
-#define TEST_FACTOR(name, nom, denom) \
- TEST_FACTOR1(name, None, nom, denom, 0) \
- TEST_FACTOR1(name, Linear, nom, denom, 3) \
- TEST_FACTOR1(name, Bilinear, nom, denom, 3) \
- TEST_FACTOR1(name, Box, nom, denom, 3)
+// Test a scale factor with all 4 filters. Expect exact for SIMD vs C.
+#define TEST_FACTOR(name, nom, denom) \
+ TEST_FACTOR1(name, None, nom, denom) \
+ TEST_FACTOR1(name, Linear, nom, denom) \
+ TEST_FACTOR1(name, Bilinear, nom, denom) \
+ TEST_FACTOR1(name, Box, nom, denom)
#else
// Test a scale factor with Bilinear.
-#define TEST_FACTOR(name, nom, denom) \
- TEST_FACTOR1(name, Bilinear, nom, denom, 3)
+#define TEST_FACTOR(name, nom, denom) TEST_FACTOR1(name, Bilinear, nom, denom)
#endif
TEST_FACTOR(2, 1, 2)
diff --git a/files/unit_test/testdata/riscv64.txt b/files/unit_test/testdata/riscv64.txt
new file mode 100644
index 00000000..fbb4200f
--- /dev/null
+++ b/files/unit_test/testdata/riscv64.txt
@@ -0,0 +1,4 @@
+processor : 0
+hart : 1
+isa : rv64imac
+mmu : sv48 \ No newline at end of file
diff --git a/files/unit_test/testdata/riscv64_rvv.txt b/files/unit_test/testdata/riscv64_rvv.txt
new file mode 100644
index 00000000..af1b3f36
--- /dev/null
+++ b/files/unit_test/testdata/riscv64_rvv.txt
@@ -0,0 +1,4 @@
+processor : 0
+hart : 1
+isa : rv64imafdcv
+mmu : sv48 \ No newline at end of file
diff --git a/files/unit_test/testdata/riscv64_rvv_zvfh.txt b/files/unit_test/testdata/riscv64_rvv_zvfh.txt
new file mode 100644
index 00000000..c416c1af
--- /dev/null
+++ b/files/unit_test/testdata/riscv64_rvv_zvfh.txt
@@ -0,0 +1,4 @@
+processor : 0
+hart : 1
+isa : rv64imafdcv_zfh_zvfh
+mmu : sv48 \ No newline at end of file
diff --git a/files/unit_test/unit_test.cc b/files/unit_test/unit_test.cc
index 61145a46..b66ebfab 100644
--- a/files/unit_test/unit_test.cc
+++ b/files/unit_test/unit_test.cc
@@ -88,6 +88,11 @@ int TestCpuEnv(int cpu_info) {
cpu_info &= ~libyuv::kCpuHasLASX;
}
#endif
+#if defined(__riscv) && defined(__linux__)
+ if (TestEnv("LIBYUV_DISABLE_RVV")) {
+ cpu_info &= ~libyuv::kCpuHasRVV;
+ }
+#endif
#if !defined(__pnacl__) && !defined(__CLR_VER) && \
(defined(__x86_64__) || defined(_M_X64) || defined(__i386__) || \
defined(_M_IX86))
diff --git a/files/unit_test/unit_test.h b/files/unit_test/unit_test.h
index 0a8df4d2..99cc8d19 100644
--- a/files/unit_test/unit_test.h
+++ b/files/unit_test/unit_test.h
@@ -11,10 +11,10 @@
#ifndef UNIT_TEST_UNIT_TEST_H_ // NOLINT
#define UNIT_TEST_UNIT_TEST_H_
+#include <stddef.h> // For NULL
#ifdef _WIN32
#include <windows.h>
#else
-#include <sys/resource.h>
#include <sys/time.h>
#endif
@@ -77,7 +77,18 @@ static inline bool SizeValid(int src_width,
#define free_aligned_buffer_page_end(var) \
free(var##_mem); \
- var = 0
+ var = NULL
+
+#define align_buffer_page_end_16(var, size) \
+ uint8_t* var##_mem = \
+ reinterpret_cast<uint8_t*>(malloc(((size)*2 + 4095 + 63) & ~4095)); \
+ uint16_t* var = reinterpret_cast<uint16_t*>( \
+ (intptr_t)(var##_mem + (((size)*2 + 4095 + 63) & ~4095) - (size)*2) & \
+ ~63)
+
+#define free_aligned_buffer_page_end_16(var) \
+ free(var##_mem); \
+ var = NULL
#ifdef WIN32
static inline double get_time() {
diff --git a/files/util/cpuid.c b/files/util/cpuid.c
index b618bb10..edc6a26e 100644
--- a/files/util/cpuid.c
+++ b/files/util/cpuid.c
@@ -21,8 +21,9 @@ using namespace libyuv;
int main(int argc, const char* argv[]) {
int cpu_flags = TestCpuFlag(-1);
int has_arm = TestCpuFlag(kCpuHasARM);
- int has_mips = TestCpuFlag(kCpuHasMIPS);
+ int has_riscv = TestCpuFlag(kCpuHasRISCV);
int has_x86 = TestCpuFlag(kCpuHasX86);
+ int has_mips = TestCpuFlag(kCpuHasMIPS);
int has_loongarch = TestCpuFlag(kCpuHasLOONGARCH);
(void)argc;
(void)argv;
@@ -62,24 +63,28 @@ int main(int argc, const char* argv[]) {
model, model);
}
#endif
- printf("Cpu Flags %x\n", cpu_flags);
- printf("Has ARM %x\n", has_arm);
- printf("Has MIPS %x\n", has_mips);
- printf("Has X86 %x\n", has_x86);
- printf("Has LOONGARCH %x\n", has_loongarch);
+ printf("Cpu Flags 0x%x\n", cpu_flags);
if (has_arm) {
int has_neon = TestCpuFlag(kCpuHasNEON);
- printf("Has NEON %x\n", has_neon);
+ printf("Has ARM 0x%x\n", has_arm);
+ printf("Has NEON 0x%x\n", has_neon);
+ }
+ if (has_riscv) {
+ int has_rvv = TestCpuFlag(kCpuHasRVV);
+ printf("Has RISCV 0x%x\n", has_riscv);
+ printf("Has RVV 0x%x\n", has_rvv);
}
if (has_mips) {
int has_msa = TestCpuFlag(kCpuHasMSA);
- printf("Has MSA %x\n", has_msa);
+ printf("Has MIPS 0x%x\n", has_mips);
+ printf("Has MSA 0x%x\n", has_msa);
}
if (has_loongarch) {
int has_lsx = TestCpuFlag(kCpuHasLSX);
- printf("Has LSX %x\n", has_lsx);
int has_lasx = TestCpuFlag(kCpuHasLASX);
- printf("Has LASX %x\n", has_lasx);
+ printf("Has LOONGARCH 0x%x\n", has_loongarch);
+ printf("Has LSX 0x%x\n", has_lsx);
+ printf("Has LASX 0x%x\n", has_lasx);
}
if (has_x86) {
int has_sse2 = TestCpuFlag(kCpuHasSSE2);
@@ -99,23 +104,24 @@ int main(int argc, const char* argv[]) {
int has_avx512vbmi2 = TestCpuFlag(kCpuHasAVX512VBMI2);
int has_avx512vbitalg = TestCpuFlag(kCpuHasAVX512VBITALG);
int has_avx512vpopcntdq = TestCpuFlag(kCpuHasAVX512VPOPCNTDQ);
- printf("Has SSE2 %x\n", has_sse2);
- printf("Has SSSE3 %x\n", has_ssse3);
- printf("Has SSE4.1 %x\n", has_sse41);
- printf("Has SSE4.2 %x\n", has_sse42);
- printf("Has AVX %x\n", has_avx);
- printf("Has AVX2 %x\n", has_avx2);
- printf("Has ERMS %x\n", has_erms);
- printf("Has FMA3 %x\n", has_fma3);
- printf("Has F16C %x\n", has_f16c);
- printf("Has GFNI %x\n", has_gfni);
- printf("Has AVX512BW %x\n", has_avx512bw);
- printf("Has AVX512VL %x\n", has_avx512vl);
- printf("Has AVX512VNNI %x\n", has_avx512vnni);
- printf("Has AVX512VBMI %x\n", has_avx512vbmi);
- printf("Has AVX512VBMI2 %x\n", has_avx512vbmi2);
- printf("Has AVX512VBITALG %x\n", has_avx512vbitalg);
- printf("Has AVX512VPOPCNTDQ %x\n", has_avx512vpopcntdq);
+ printf("Has X86 0x%x\n", has_x86);
+ printf("Has SSE2 0x%x\n", has_sse2);
+ printf("Has SSSE3 0x%x\n", has_ssse3);
+ printf("Has SSE4.1 0x%x\n", has_sse41);
+ printf("Has SSE4.2 0x%x\n", has_sse42);
+ printf("Has AVX 0x%x\n", has_avx);
+ printf("Has AVX2 0x%x\n", has_avx2);
+ printf("Has ERMS 0x%x\n", has_erms);
+ printf("Has FMA3 0x%x\n", has_fma3);
+ printf("Has F16C 0x%x\n", has_f16c);
+ printf("Has GFNI 0x%x\n", has_gfni);
+ printf("Has AVX512BW 0x%x\n", has_avx512bw);
+ printf("Has AVX512VL 0x%x\n", has_avx512vl);
+ printf("Has AVX512VNNI 0x%x\n", has_avx512vnni);
+ printf("Has AVX512VBMI 0x%x\n", has_avx512vbmi);
+ printf("Has AVX512VBMI2 0x%x\n", has_avx512vbmi2);
+ printf("Has AVX512VBITALG 0x%x\n", has_avx512vbitalg);
+ printf("Has AVX512VPOPCNTDQ 0x%x\n", has_avx512vpopcntdq);
}
return 0;
}
diff --git a/files/util/yuvconstants.c b/files/util/yuvconstants.c
index 037e0824..4e5185af 100644
--- a/files/util/yuvconstants.c
+++ b/files/util/yuvconstants.c
@@ -43,9 +43,10 @@
// #define BR (-VR * 128 + YB)
int main(int argc, const char* argv[]) {
- if (argc < 2) {
- printf("yuvconstants Kr Kb\n");
- printf(" MC BT KR = 0.2126; KB = 0.0722\n");
+ if (argc < 3) {
+ printf("yuvconstants [KR] [KB]\n");
+ printf(" e.g. yuvconstants 0.2126 0.0722\n");
+ printf(" MC BT KR KB\n");
printf(" 1 BT.709 KR = 0.2126; KB = 0.0722\n");
printf(" 4 FCC KR = 0.30; KB = 0.11\n");
printf(" 6 BT.601 KR = 0.299; KB = 0.114\n");
@@ -53,8 +54,8 @@ int main(int argc, const char* argv[]) {
printf(" 9 BT.2020 KR = 0.2627; KB = 0.0593\n");
return -1;
}
- float kr = atof(argv[1]);
- float kb = atof(argv[2]);
+ float kr = (float)atof(argv[1]);
+ float kb = (float)atof(argv[2]);
float kg = 1 - kr - kb;
float vr = 2 * (1 - kr);
diff --git a/files/util/yuvconvert.cc b/files/util/yuvconvert.cc
index 332699e3..93b52668 100644
--- a/files/util/yuvconvert.cc
+++ b/files/util/yuvconvert.cc
@@ -42,9 +42,9 @@ static __inline uint32_t Abs(int32_t v) {
}
// Parse PYUV format. ie name.1920x800_24Hz_P420.yuv
-bool ExtractResolutionFromFilename(const char* name,
- int* width_ptr,
- int* height_ptr) {
+static bool ExtractResolutionFromFilename(const char* name,
+ int* width_ptr,
+ int* height_ptr) {
// Isolate the .width_height. section of the filename by searching for a
// dot or underscore followed by a digit.
for (int i = 0; name[i]; ++i) {
@@ -59,7 +59,7 @@ bool ExtractResolutionFromFilename(const char* name,
return false;
}
-void PrintHelp(const char* program) {
+static void PrintHelp(const char* program) {
printf("%s [-options] src_argb.raw dst_yuv.raw\n", program);
printf(
" -s <width> <height> .... specify source resolution. "
@@ -78,7 +78,7 @@ void PrintHelp(const char* program) {
exit(0);
}
-void ParseOptions(int argc, const char* argv[]) {
+static void ParseOptions(int argc, const char* argv[]) {
if (argc <= 1) {
PrintHelp(argv[0]);
}