aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorjinbo <jinbo@loongson.cn>2024-01-08 10:17:17 +0800
committerCosmin Truta <ctruta@gmail.com>2024-01-17 22:42:02 +0200
commit6b0d1bd75b6c51bb5da12f35327508cc31b92da4 (patch)
tree9e3eae7eec1861b80aa2e750006d5bfdf54ac6b1
parent2a4f0f5aee8b78d8b7afeb91bf9b6ad7f6e0131e (diff)
downloadlibpng-6b0d1bd75b6c51bb5da12f35327508cc31b92da4.tar.gz
Add loongarch support and LSX SIMD optimizations
Enable LSX by default: ./configure && make Disable LSX: ./configure --enable-loongarch-lsx=no && make Signed-off-by: Cosmin Truta <ctruta@gmail.com>
-rw-r--r--Makefile.am13
-rw-r--r--configure.ac71
-rw-r--r--loongarch/filter_lsx_intrinsics.c412
-rw-r--r--loongarch/loongarch_lsx_init.c65
-rw-r--r--pngpriv.h28
5 files changed, 589 insertions, 0 deletions
diff --git a/Makefile.am b/Makefile.am
index ba51d91b4..370bdbf78 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -127,6 +127,15 @@ libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_SOURCES += powerpc/powerpc_init.c\
powerpc/filter_vsx_intrinsics.c
endif
+if PNG_LOONGARCH_LSX
+noinst_LTLIBRARIES= libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@lsx.la
+libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@lsx_la_SOURCES = loongarch/loongarch_lsx_init.c\
+ loongarch/filter_lsx_intrinsics.c
+libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@lsx_la_CFLAGS = -mlsx
+libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_LIBADD = libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@lsx.la
+# libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_DEPENDENCIES = libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@lsx.la
+endif
+
nodist_libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_SOURCES = pnglibconf.h
libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_LDFLAGS = -no-undefined -export-dynamic \
@@ -147,6 +156,10 @@ else
libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_DEPENDENCIES = libpng.sym
endif
+if PNG_LOONGARCH_LSX
+ libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_DEPENDENCIES += libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@lsx.la
+endif
+
#distribute headers in /usr/include/libpng/*
pkgincludedir= $(includedir)/$(PNGLIB_BASENAME)
pkginclude_HEADERS= png.h pngconf.h
diff --git a/configure.ac b/configure.ac
index 051d7933f..938c106a7 100644
--- a/configure.ac
+++ b/configure.ac
@@ -334,6 +334,9 @@ AC_ARG_ENABLE([hardware-optimizations],
enable_intel_sse=no
AC_DEFINE([PNG_INTEL_SSE_OPT], [0],
[Disable INTEL_SSE optimizations])
+ enable_loongarch_lsx=no
+ AC_DEFINE([PNG_LOONGARCH_LSX_OPT], [0],
+ [Disable LOONGARCH_LSX optimizations])
;;
*)
# allow enabling hardware optimization on any system:
@@ -358,6 +361,11 @@ AC_ARG_ENABLE([hardware-optimizations],
AC_DEFINE([PNG_POWERPC_VSX_OPT], [2],
[Enable POWERPC VSX optimizations])
;;
+ loongarch*)
+ enable_loongarch_lsx=yes
+ AC_DEFINE([PNG_LOONGARCH_LSX_OPT], [1],
+ [Enable LOONGARCH_LSX optimizations])
+ ;;
esac
;;
esac])
@@ -535,6 +543,69 @@ AM_CONDITIONAL([PNG_POWERPC_VSX],
powerpc*|ppc64*) : ;;
esac])
+# LOONGARCH
+# ===
+#
+# LOONGARCH LSX (SIMD) support
+
+if test "$LSX_CFLAGS" = ''; then
+ LSX_CFLAGS="-mlsx"
+fi
+
+compiler_support_loongarch_lsx=no
+AC_MSG_CHECKING(whether to use loongarch LSX intrinsics)
+save_CFLAGS=$CFLAGS
+CFLAGS="$CFLAGS $LSX_CFLAGS"
+AC_COMPILE_IFELSE([AC_LANG_SOURCE([[
+#include<lsxintrin.h>
+int main(){
+ __m128i a, b, c;
+ a = __lsx_vadd_w(b, c);
+ return 0;
+}]])],compiler_support_loongarch_lsx=yes)
+CFLAGS=$save_CFLAGS
+AC_MSG_RESULT($compiler_support_loongarch_lsx)
+
+AC_ARG_ENABLE([loongarch-lsx],
+ AS_HELP_STRING([[[--enable-loongarch-lsx]]],
+ [Enable LOONGARCH LSX optimizations: =no/off, yes/on:]
+ [no/off: disable the optimizations;]
+ [yes/on: turn on unconditionally.]
+ [If not specified: determined by the compiler.]),
+ [case "$enableval" in
+ no|off)
+ # disable the default enabling on __loongarch_simd systems:
+ AC_DEFINE([PNG_LOONGARCH_LSX_OPT], [0],
+ [Disable LOONGARCH LSX optimizations])
+ # Prevent inclusion of the assembler files below:
+ enable_loongarch_lsx=no;;
+ yes|on)
+ AC_DEFINE([PNG_LOONGARCH_LSX_OPT], [1],
+ [Enable LOONGARCH LSX optimizations])
+ ;;
+ *)
+ AC_MSG_ERROR([--enable-loongarch-lsx=${enable_loongarch_lsx}: invalid value])
+ esac])
+
+if test "$enable_loongarch_lsx" != 'no'; then
+ if test $compiler_support_loongarch_lsx = yes; then
+ AC_DEFINE([PNG_LOONGARCH_LSX_OPT], [1], [Enable LOONGARCH LSX optimizations])
+ else
+ AC_MSG_WARN([Compiler does not support loongarch LSX.])
+ fi
+fi
+
+# Add LOONGARCH specific files to all builds where the host_cpu is loongarch ('loongarch*') or
+# where LOONGARCH optimizations were explicitly requested (this allows a fallback if a
+# future host CPU does not match 'loongarch*')
+
+AM_CONDITIONAL([PNG_LOONGARCH_LSX],
+ [test "$enable_loongarch_lsx" != 'no' && test $compiler_support_loongarch_lsx = yes &&
+ case "$host_cpu" in
+ loongarch*) :;;
+ *) test "$enable_loongarch_lsx" != '';;
+ esac])
+
AC_MSG_NOTICE([[Extra options for compiler: $PNG_COPTS]])
# Config files, substituting as above
diff --git a/loongarch/filter_lsx_intrinsics.c b/loongarch/filter_lsx_intrinsics.c
new file mode 100644
index 000000000..af6cc763a
--- /dev/null
+++ b/loongarch/filter_lsx_intrinsics.c
@@ -0,0 +1,412 @@
+/* filter_lsx_intrinsics.c - LSX optimized filter functions
+ *
+ * Copyright (c) 2021 Loongson Technology Corporation Limited
+ * All rights reserved.
+ * Copyright (c) 2018 Cosmin Truta
+ * Copyright (c) 2016 Glenn Randers-Pehrson
+ * Contributed by Jin Bo (jinbo@loongson.cn)
+ *
+ * This code is released under the libpng license.
+ * For conditions of distribution and use, see the disclaimer
+ * and license in png.h
+ */
+
+#include "../pngpriv.h"
+
+#ifdef PNG_READ_SUPPORTED
+
+#if PNG_LOONGARCH_LSX_IMPLEMENTATION == 1 /* intrinsics code from pngpriv.h */
+
+#include <lsxintrin.h>
+
+#define LSX_LD(psrc) __lsx_vld((psrc), 0)
+
+#define LSX_LD_2(psrc, stride, out0, out1) \
+{ \
+ out0 = LSX_LD(psrc); \
+ out1 = LSX_LD(psrc + stride); \
+}
+
+#define LSX_LD_4(psrc, stride, out0, out1, out2, out3) \
+{ \
+ LSX_LD_2(psrc, stride, out0, out1); \
+ LSX_LD_2(psrc + stride * 2, stride, out2, out3); \
+}
+
+#define LSX_ST(in, pdst) __lsx_vst(in, (pdst), 0)
+
+#define LSX_ST_2(in0, in1, pdst, stride) \
+{ \
+ LSX_ST(in0, pdst); \
+ LSX_ST(in1, pdst + stride); \
+}
+
+#define LSX_ST_4(in0, in1, in2, in3, pdst, stride) \
+{ \
+ LSX_ST_2(in0, in1, pdst, stride); \
+ LSX_ST_2(in2, in3, pdst + stride * 2, stride); \
+}
+
+#define LSX_ADD_B(in0, in1, out0) \
+{ \
+ out0 = __lsx_vadd_b(in0, in1); \
+}
+
+#define LSX_ADD_B_2(in0, in1, in2, in3, out0, out1) \
+{ \
+ LSX_ADD_B(in0, in1, out0); \
+ LSX_ADD_B(in2, in3, out1); \
+}
+
+#define LSX_ADD_B_4(in0, in1, in2, in3, in4, in5, \
+ in6, in7, out0, out1, out2, out3) \
+{ \
+ LSX_ADD_B_2(in0, in1, in2, in3, out0, out1); \
+ LSX_ADD_B_2(in4, in5, in6, in7, out2, out3); \
+}
+
+#define LSX_ABS_B_3(in0, in1, in2, out0, out1, out2) \
+{ \
+ out0 = __lsx_vadda_h(in0, zero); \
+ out1 = __lsx_vadda_h(in1, zero); \
+ out2 = __lsx_vadda_h(in2, zero); \
+}
+
+#define LSX_ILVL_B(in_h, in_l, out0) \
+{ \
+ out0 = __lsx_vilvl_b(in_h, in_l); \
+}
+
+#define LSX_ILVL_B_2(in0_h, in0_l, in1_h, in1_l, out0, out1) \
+{ \
+ LSX_ILVL_B(in0_h, in0_l, out0); \
+ LSX_ILVL_B(in1_h, in1_l, out1); \
+}
+
+#define LSX_HSUB_HU_BU_2(in0, in1, out0, out1) \
+{ \
+ out0 = __lsx_vhsubw_hu_bu(in0, in0); \
+ out1 = __lsx_vhsubw_hu_bu(in1, in1); \
+}
+
+#define LSX_CMP_PICK_SMALLER(in0, in1, in2, in3, in4, in5, out0) \
+{ \
+ __m128i _cmph, _cmpb, _in0, _in3; \
+ _cmph = __lsx_vslt_h(in1, in0); \
+ _cmpb = __lsx_vpickev_b(_cmph, _cmph); \
+ _in0 = __lsx_vmin_bu(in0,in1); \
+ _in3 = __lsx_vbitsel_v(in3, in4, _cmpb); \
+ _cmph = __lsx_vslt_h(in2, _in0); \
+ _cmpb = __lsx_vpickev_b(_cmph, _cmph); \
+ _in3 = __lsx_vbitsel_v(_in3, in5, _cmpb); \
+ out0 = __lsx_vadd_b(out0, _in3); \
+}
+
+void png_read_filter_row_up_lsx(png_row_infop row_info, png_bytep row,
+ png_const_bytep prev_row)
+{
+ size_t n = row_info->rowbytes;
+ png_bytep rp = row;
+ png_const_bytep pp = prev_row;
+ __m128i vec_0, vec_1, vec_2, vec_3;
+ __m128i vec_4, vec_5, vec_6, vec_7;
+
+ while (n >= 64)
+ {
+ LSX_LD_4(rp, 16, vec_0, vec_1, vec_2, vec_3);
+ LSX_LD_4(pp, 16, vec_4, vec_5, vec_6, vec_7);
+ pp += 64;
+ LSX_ADD_B_4(vec_0 ,vec_4, vec_1, vec_5, vec_2, vec_6,
+ vec_3, vec_7, vec_0, vec_1, vec_2, vec_3);
+ LSX_ST_4(vec_0, vec_1, vec_2, vec_3, rp, 16);
+ rp += 64;
+ n -= 64;
+ }
+ if (n & 63)
+ {
+ if (n >= 32)
+ {
+ LSX_LD_2(rp, 16, vec_0, vec_1);
+ LSX_LD_2(pp, 16, vec_2, vec_3);
+ pp += 32;
+ LSX_ADD_B_2(vec_0, vec_2, vec_1, vec_3, vec_0, vec_1);
+ LSX_ST_2(vec_0, vec_1, rp, 16);
+ rp += 32;
+ n -= 32;
+ }
+ if (n & 31)
+ {
+ if (n >= 16)
+ {
+ vec_0 = LSX_LD(rp);
+ vec_1 = LSX_LD(pp);
+ pp += 16;
+ LSX_ADD_B(vec_0, vec_1, vec_0);
+ LSX_ST(vec_0, rp);
+ rp += 16;
+ n -= 16;
+ }
+ if (n >= 8)
+ {
+ vec_0 = __lsx_vldrepl_d(rp, 0);
+ vec_1 = __lsx_vldrepl_d(pp, 0);
+ vec_0 = __lsx_vadd_b(vec_0, vec_1);
+ __lsx_vstelm_d(vec_0, rp, 0, 0);
+ rp += 8;
+ pp += 8;
+ n -= 8;
+ }
+ while (n--)
+ {
+ *rp = *rp + *pp++;
+ rp++;
+ }
+ }
+ }
+}
+
+void png_read_filter_row_sub3_lsx(png_row_infop row_info, png_bytep row,
+ png_const_bytep prev_row)
+{
+ size_t n = row_info->rowbytes;
+ png_uint_32 tmp;
+ png_bytep nxt = row;
+ __m128i vec_0, vec_1;
+
+ PNG_UNUSED(prev_row);
+
+ vec_0 = __lsx_vldrepl_w(nxt, 0);
+ nxt += 3;
+ n -= 3;
+
+ while (n >= 3)
+ {
+ vec_1 = __lsx_vldrepl_w(nxt, 0);
+ vec_1 = __lsx_vadd_b(vec_1, vec_0);
+ __lsx_vstelm_h(vec_1, nxt, 0, 0);
+ vec_0 = vec_1;
+ nxt += 2;
+ __lsx_vstelm_b(vec_1, nxt, 0, 2);
+ nxt += 1;
+ n -= 3;
+ }
+
+ row = nxt - 3;
+ while (n--)
+ {
+ *nxt = *nxt + *row++;
+ nxt++;
+ }
+}
+
+void png_read_filter_row_sub4_lsx(png_row_infop row_info, png_bytep row,
+ png_const_bytep prev_row)
+{
+ size_t n = row_info->rowbytes;
+ __m128i vec_0, vec_1;
+
+ PNG_UNUSED(prev_row);
+
+ vec_0 = __lsx_vldrepl_w(row, 0);
+ row += 4;
+ n -= 4;
+
+ while (n >= 4)
+ {
+ vec_1 = __lsx_vldrepl_w(row, 0);
+ vec_1 = __lsx_vadd_b(vec_1, vec_0);
+ __lsx_vstelm_w(vec_1, row, 0, 0);
+ vec_0 = vec_1;
+ row += 4;
+ n -= 4;
+ }
+}
+
+void png_read_filter_row_avg3_lsx(png_row_infop row_info, png_bytep row,
+ png_const_bytep prev_row)
+{
+ size_t n = row_info->rowbytes;
+ png_bytep nxt = row;
+ png_const_bytep prev_nxt = prev_row;
+ __m128i vec_0, vec_1, vec_2;
+
+ vec_0 = __lsx_vldrepl_w(nxt, 0);
+ vec_1 = __lsx_vldrepl_w(prev_nxt, 0);
+ prev_nxt += 3;
+ vec_1 = __lsx_vsrli_b(vec_1, 1);
+ vec_1 = __lsx_vadd_b(vec_1, vec_0);
+ __lsx_vstelm_h(vec_1, nxt, 0, 0);
+ nxt += 2;
+ __lsx_vstelm_b(vec_1, nxt, 0, 2);
+ nxt += 1;
+ n -= 3;
+
+ while (n >= 3)
+ {
+ vec_2 = vec_1;
+ vec_0 = __lsx_vldrepl_w(nxt, 0);
+ vec_1 = __lsx_vldrepl_w(prev_nxt, 0);
+ prev_nxt += 3;
+
+ vec_1 = __lsx_vavg_bu(vec_1, vec_2);
+ vec_1 = __lsx_vadd_b(vec_1, vec_0);
+
+ __lsx_vstelm_h(vec_1, nxt, 0, 0);
+ nxt += 2;
+ __lsx_vstelm_b(vec_1, nxt, 0, 2);
+ nxt += 1;
+ n -= 3;
+ }
+
+ row = nxt - 3;
+ while (n--)
+ {
+ vec_2 = __lsx_vldrepl_b(row, 0);
+ row++;
+ vec_0 = __lsx_vldrepl_b(nxt, 0);
+ vec_1 = __lsx_vldrepl_b(prev_nxt, 0);
+ prev_nxt++;
+
+ vec_1 = __lsx_vavg_bu(vec_1, vec_2);
+ vec_1 = __lsx_vadd_b(vec_1, vec_0);
+
+ __lsx_vstelm_b(vec_1, nxt, 0, 0);
+ nxt++;
+ }
+}
+
+void png_read_filter_row_avg4_lsx(png_row_infop row_info, png_bytep row,
+ png_const_bytep prev_row)
+{
+ size_t n = row_info->rowbytes;
+ __m128i vec_0, vec_1, vec_2;
+
+ vec_0 = __lsx_vldrepl_w(row, 0);
+ vec_1 = __lsx_vldrepl_w(prev_row, 0);
+ prev_row += 4;
+ vec_1 = __lsx_vsrli_b(vec_1, 1);
+ vec_1 = __lsx_vadd_b(vec_1, vec_0);
+ __lsx_vstelm_w(vec_1, row, 0, 0);
+ row += 4;
+ n -= 4;
+
+ while (n >= 4)
+ {
+ vec_2 = vec_1;
+ vec_0 = __lsx_vldrepl_w(row, 0);
+ vec_1 = __lsx_vldrepl_w(prev_row, 0);
+ prev_row += 4;
+
+ vec_1 = __lsx_vavg_bu(vec_1, vec_2);
+ vec_1 = __lsx_vadd_b(vec_1, vec_0);
+
+ __lsx_vstelm_w(vec_1, row, 0, 0);
+ row += 4;
+ n -= 4;
+ }
+}
+
+void png_read_filter_row_paeth3_lsx(png_row_infop row_info,
+ png_bytep row,
+ png_const_bytep prev_row)
+{
+ size_t n = row_info->rowbytes;
+ png_bytep nxt = row;
+ png_const_bytep prev_nxt = prev_row;
+ __m128i vec_a, vec_b, vec_c, vec_d;
+ __m128i vec_pa, vec_pb, vec_pc;
+ __m128i zero = {0};
+
+ vec_a = __lsx_vldrepl_w(nxt, 0);
+ vec_b = __lsx_vldrepl_w(prev_nxt, 0);
+ prev_nxt += 3;
+ vec_d = __lsx_vadd_b(vec_a, vec_b);
+ __lsx_vstelm_h(vec_d, nxt, 0, 0);
+ nxt += 2;
+ __lsx_vstelm_b(vec_d, nxt, 0, 2);
+ nxt += 1;
+ n -= 3;
+
+ while (n >= 3)
+ {
+ vec_a = vec_d;
+ vec_c = vec_b;
+ vec_b = __lsx_vldrepl_w(prev_nxt, 0);
+ prev_nxt += 3;
+ vec_d = __lsx_vldrepl_w(nxt, 0);
+
+ LSX_ILVL_B_2(vec_b, vec_c, vec_a, vec_c, vec_pa, vec_pb);
+ LSX_HSUB_HU_BU_2(vec_pa, vec_pb, vec_pa, vec_pb);
+ vec_pc = __lsx_vadd_h(vec_pa, vec_pb);
+ LSX_ABS_B_3(vec_pa, vec_pb, vec_pc, vec_pa, vec_pb, vec_pc);
+ LSX_CMP_PICK_SMALLER(vec_pa, vec_pb, vec_pc, vec_a, vec_b, vec_c, vec_d);
+
+ __lsx_vstelm_h(vec_d, nxt, 0, 0);
+ nxt += 2;
+ __lsx_vstelm_b(vec_d, nxt, 0, 2);
+ nxt += 1;
+ n -= 3;
+ }
+
+ prev_row = prev_nxt - 3;
+ row = nxt - 3;
+ while (n--)
+ {
+ vec_a = __lsx_vldrepl_b(row, 0);
+ row++;
+ vec_b = __lsx_vldrepl_b(prev_nxt, 0);
+ prev_nxt++;
+ vec_c = __lsx_vldrepl_b(prev_row, 0);
+ prev_row++;
+ vec_d = __lsx_vldrepl_b(nxt, 0);
+
+ LSX_ILVL_B_2(vec_b, vec_c, vec_a, vec_c, vec_pa, vec_pb);
+ LSX_HSUB_HU_BU_2(vec_pa, vec_pb, vec_pa, vec_pb);
+ vec_pc = __lsx_vadd_h(vec_pa, vec_pb);
+ LSX_ABS_B_3(vec_pa, vec_pb, vec_pc, vec_pa, vec_pb, vec_pc);
+ LSX_CMP_PICK_SMALLER(vec_pa, vec_pb, vec_pc, vec_a, vec_b, vec_c, vec_d);
+
+ __lsx_vstelm_b(vec_d, nxt, 0, 0);
+ nxt++;
+ }
+}
+
+void png_read_filter_row_paeth4_lsx(png_row_infop row_info,
+ png_bytep row,
+ png_const_bytep prev_row)
+{
+ size_t n = row_info->rowbytes;
+ __m128i vec_a, vec_b, vec_c, vec_d;
+ __m128i vec_pa, vec_pb, vec_pc;
+ __m128i zero = {0};
+
+ vec_a = __lsx_vldrepl_w(row, 0);
+ vec_b = __lsx_vldrepl_w(prev_row, 0);
+ prev_row += 4;
+ vec_d = __lsx_vadd_b(vec_a, vec_b);
+ __lsx_vstelm_w(vec_d, row, 0, 0);
+ row += 4;
+ n -= 4;
+
+ while (n >= 4)
+ {
+ vec_a = vec_d;
+ vec_c = vec_b;
+ vec_b = __lsx_vldrepl_w(prev_row, 0);
+ prev_row += 4;
+ vec_d = __lsx_vldrepl_w(row, 0);
+
+ LSX_ILVL_B_2(vec_b, vec_c, vec_a, vec_c, vec_pa, vec_pb);
+ LSX_HSUB_HU_BU_2(vec_pa, vec_pb, vec_pa, vec_pb);
+ vec_pc = __lsx_vadd_h(vec_pa, vec_pb);
+ LSX_ABS_B_3(vec_pa, vec_pb, vec_pc, vec_pa, vec_pb, vec_pc);
+ LSX_CMP_PICK_SMALLER(vec_pa, vec_pb, vec_pc, vec_a, vec_b, vec_c, vec_d);
+
+ __lsx_vstelm_w(vec_d, row, 0, 0);
+ row += 4;
+ n -= 4;
+ }
+}
+
+#endif /* PNG_LOONGARCH_LSX_IMPLEMENTATION == 1 (intrinsics) */
+#endif /* PNG_READ_SUPPORTED */
diff --git a/loongarch/loongarch_lsx_init.c b/loongarch/loongarch_lsx_init.c
new file mode 100644
index 000000000..2c80fe81b
--- /dev/null
+++ b/loongarch/loongarch_lsx_init.c
@@ -0,0 +1,65 @@
+/* loongarch_lsx_init.c - LSX optimized filter functions
+ *
+ * Copyright (c) 2021 Loongson Technology Corporation Limited
+ * All rights reserved.
+ * Contributed by Jin Bo <jinbo@loongson.cn>
+ *
+ * This code is released under the libpng license.
+ * For conditions of distribution and use, see the disclaimer
+ * and license in png.h
+ */
+
+#include "../pngpriv.h"
+
+#ifdef PNG_READ_SUPPORTED
+#if PNG_LOONGARCH_LSX_IMPLEMENTATION == 1
+
+#include <sys/auxv.h>
+
+#define LA_HWCAP_LSX (1<<4)
+static int png_has_lsx(void)
+{
+ int flags = 0;
+ int flag = (int)getauxval(AT_HWCAP);
+
+ if (flag & LA_HWCAP_LSX)
+ return 1;
+
+ return 0;
+}
+
+void
+png_init_filter_functions_lsx(png_structp pp, unsigned int bpp)
+{
+ /* IMPORTANT: any new external functions used here must be declared using
+ * PNG_INTERNAL_FUNCTION in ../pngpriv.h. This is required so that the
+ * 'prefix' option to configure works:
+ *
+ * ./configure --with-libpng-prefix=foobar_
+ *
+ * Verify you have got this right by running the above command, doing a build
+ * and examining pngprefix.h; it must contain a #define for every external
+ * function you add. (Notice that this happens automatically for the
+ * initialization function.)
+ */
+
+ if (png_has_lsx())
+ {
+ pp->read_filter[PNG_FILTER_VALUE_UP-1] = png_read_filter_row_up_lsx;
+ if (bpp == 3)
+ {
+ pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub3_lsx;
+ pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg3_lsx;
+ pp->read_filter[PNG_FILTER_VALUE_PAETH-1] = png_read_filter_row_paeth3_lsx;
+ }
+ else if (bpp == 4)
+ {
+ pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub4_lsx;
+ pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg4_lsx;
+ pp->read_filter[PNG_FILTER_VALUE_PAETH-1] = png_read_filter_row_paeth4_lsx;
+ }
+ }
+}
+
+#endif /* PNG_LOONGARCH_LSX_IMPLEMENTATION == 1 */
+#endif /* PNG_READ_SUPPORTED */
diff --git a/pngpriv.h b/pngpriv.h
index 7c19373f0..cdbc6c342 100644
--- a/pngpriv.h
+++ b/pngpriv.h
@@ -276,6 +276,12 @@
# define PNG_POWERPC_VSX_IMPLEMENTATION 0
#endif
+#if PNG_LOONGARCH_LSX_OPT > 0
+# define PNG_FILTER_OPTIMIZATIONS png_init_filter_functions_lsx
+# define PNG_LOONGARCH_LSX_IMPLEMENTATION 1
+#else
+# define PNG_LOONGARCH_LSX_IMPLEMENTATION 0
+#endif
/* Is this a build of a DLL where compilation of the object modules requires
* different preprocessor settings to those required for a simple library? If
@@ -1355,6 +1361,23 @@ PNG_INTERNAL_FUNCTION(void,png_read_filter_row_paeth4_sse2,(png_row_infop
row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
#endif
+#if PNG_LOONGARCH_LSX_IMPLEMENTATION == 1
+PNG_INTERNAL_FUNCTION(void,png_read_filter_row_up_lsx,(png_row_infop
+ row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
+PNG_INTERNAL_FUNCTION(void,png_read_filter_row_sub3_lsx,(png_row_infop
+ row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
+PNG_INTERNAL_FUNCTION(void,png_read_filter_row_sub4_lsx,(png_row_infop
+ row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
+PNG_INTERNAL_FUNCTION(void,png_read_filter_row_avg3_lsx,(png_row_infop
+ row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
+PNG_INTERNAL_FUNCTION(void,png_read_filter_row_avg4_lsx,(png_row_infop
+ row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
+PNG_INTERNAL_FUNCTION(void,png_read_filter_row_paeth3_lsx,(png_row_infop
+ row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
+PNG_INTERNAL_FUNCTION(void,png_read_filter_row_paeth4_lsx,(png_row_infop
+ row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
+#endif
+
/* Choose the best filter to use and filter the row data */
PNG_INTERNAL_FUNCTION(void,png_write_find_filter,(png_structrp png_ptr,
png_row_infop row_info),PNG_EMPTY);
@@ -2105,6 +2128,11 @@ PNG_INTERNAL_FUNCTION(void, png_init_filter_functions_sse2,
# endif
#endif
+#if PNG_LOONGARCH_LSX_OPT > 0
+PNG_INTERNAL_FUNCTION(void, png_init_filter_functions_lsx,
+ (png_structp png_ptr, unsigned int bpp), PNG_EMPTY);
+#endif
+
PNG_INTERNAL_FUNCTION(png_uint_32, png_check_keyword, (png_structrp png_ptr,
png_const_charp key, png_bytep new_key), PNG_EMPTY);