aboutsummaryrefslogtreecommitdiff
path: root/simd/i386/jsimd.c
diff options
context:
space:
mode:
Diffstat (limited to 'simd/i386/jsimd.c')
-rw-r--r--simd/i386/jsimd.c107
1 files changed, 83 insertions, 24 deletions
diff --git a/simd/i386/jsimd.c b/simd/i386/jsimd.c
index 563949a0..b429b0a5 100644
--- a/simd/i386/jsimd.c
+++ b/simd/i386/jsimd.c
@@ -2,8 +2,8 @@
* jsimd_i386.c
*
* Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
- * Copyright (C) 2009-2011, 2013-2014, 2016, 2018, D. R. Commander.
- * Copyright (C) 2015-2016, 2018, Matthieu Darbois.
+ * Copyright (C) 2009-2011, 2013-2014, 2016, 2018, 2022-2023, D. R. Commander.
+ * Copyright (C) 2015-2016, 2018, 2022, Matthieu Darbois.
*
* Based on the x86 SIMD extension for IJG JPEG library,
* Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -21,7 +21,6 @@
#include "../../jdct.h"
#include "../../jsimddct.h"
#include "../jsimd.h"
-#include "jconfigint.h"
/*
* In the PIC cases, we have no guarantee that constants will keep
@@ -32,19 +31,17 @@
#define IS_ALIGNED_SSE(ptr) (IS_ALIGNED(ptr, 4)) /* 16 byte alignment */
#define IS_ALIGNED_AVX(ptr) (IS_ALIGNED(ptr, 5)) /* 32 byte alignment */
-static unsigned int simd_support = (unsigned int)(~0);
-static unsigned int simd_huffman = 1;
+static THREAD_LOCAL unsigned int simd_support = (unsigned int)(~0);
+static THREAD_LOCAL unsigned int simd_huffman = 1;
/*
* Check what SIMD accelerations are supported.
- *
- * FIXME: This code is racy under a multi-threaded environment.
*/
LOCAL(void)
init_simd(void)
{
#ifndef NO_GETENV
- char *env = NULL;
+ char env[2] = { 0 };
#endif
if (simd_support != ~0U)
@@ -54,26 +51,19 @@ init_simd(void)
#ifndef NO_GETENV
/* Force different settings through environment variables */
- env = getenv("JSIMD_FORCEMMX");
- if ((env != NULL) && (strcmp(env, "1") == 0))
+ if (!GETENV_S(env, 2, "JSIMD_FORCEMMX") && !strcmp(env, "1"))
simd_support &= JSIMD_MMX;
- env = getenv("JSIMD_FORCE3DNOW");
- if ((env != NULL) && (strcmp(env, "1") == 0))
+ if (!GETENV_S(env, 2, "JSIMD_FORCE3DNOW") && !strcmp(env, "1"))
simd_support &= JSIMD_3DNOW | JSIMD_MMX;
- env = getenv("JSIMD_FORCESSE");
- if ((env != NULL) && (strcmp(env, "1") == 0))
+ if (!GETENV_S(env, 2, "JSIMD_FORCESSE") && !strcmp(env, "1"))
simd_support &= JSIMD_SSE | JSIMD_MMX;
- env = getenv("JSIMD_FORCESSE2");
- if ((env != NULL) && (strcmp(env, "1") == 0))
+ if (!GETENV_S(env, 2, "JSIMD_FORCESSE2") && !strcmp(env, "1"))
simd_support &= JSIMD_SSE2;
- env = getenv("JSIMD_FORCEAVX2");
- if ((env != NULL) && (strcmp(env, "1") == 0))
+ if (!GETENV_S(env, 2, "JSIMD_FORCEAVX2") && !strcmp(env, "1"))
simd_support &= JSIMD_AVX2;
- env = getenv("JSIMD_FORCENONE");
- if ((env != NULL) && (strcmp(env, "1") == 0))
+ if (!GETENV_S(env, 2, "JSIMD_FORCENONE") && !strcmp(env, "1"))
simd_support = 0;
- env = getenv("JSIMD_NOHUFFENC");
- if ((env != NULL) && (strcmp(env, "1") == 0))
+ if (!GETENV_S(env, 2, "JSIMD_NOHUFFENC") && !strcmp(env, "1"))
simd_huffman = 0;
#endif
}
@@ -168,6 +158,9 @@ jsimd_rgb_ycc_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
void (*sse2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
void (*mmxfct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
+ if (simd_support == ~0U)
+ init_simd();
+
switch (cinfo->in_color_space) {
case JCS_EXT_RGB:
avx2fct = jsimd_extrgb_ycc_convert_avx2;
@@ -227,6 +220,9 @@ jsimd_rgb_gray_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
void (*sse2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
void (*mmxfct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
+ if (simd_support == ~0U)
+ init_simd();
+
switch (cinfo->in_color_space) {
case JCS_EXT_RGB:
avx2fct = jsimd_extrgb_gray_convert_avx2;
@@ -286,6 +282,9 @@ jsimd_ycc_rgb_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
void (*sse2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
void (*mmxfct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
+ if (simd_support == ~0U)
+ init_simd();
+
switch (cinfo->out_color_space) {
case JCS_EXT_RGB:
avx2fct = jsimd_ycc_extrgb_convert_avx2;
@@ -389,6 +388,9 @@ GLOBAL(void)
jsimd_h2v2_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
JSAMPARRAY input_data, JSAMPARRAY output_data)
{
+ if (simd_support == ~0U)
+ init_simd();
+
if (simd_support & JSIMD_AVX2)
jsimd_h2v2_downsample_avx2(cinfo->image_width, cinfo->max_v_samp_factor,
compptr->v_samp_factor,
@@ -409,6 +411,9 @@ GLOBAL(void)
jsimd_h2v1_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
JSAMPARRAY input_data, JSAMPARRAY output_data)
{
+ if (simd_support == ~0U)
+ init_simd();
+
if (simd_support & JSIMD_AVX2)
jsimd_h2v1_downsample_avx2(cinfo->image_width, cinfo->max_v_samp_factor,
compptr->v_samp_factor,
@@ -471,6 +476,9 @@ GLOBAL(void)
jsimd_h2v2_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
{
+ if (simd_support == ~0U)
+ init_simd();
+
if (simd_support & JSIMD_AVX2)
jsimd_h2v2_upsample_avx2(cinfo->max_v_samp_factor, cinfo->output_width,
input_data, output_data_ptr);
@@ -486,6 +494,9 @@ GLOBAL(void)
jsimd_h2v1_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
{
+ if (simd_support == ~0U)
+ init_simd();
+
if (simd_support & JSIMD_AVX2)
jsimd_h2v1_upsample_avx2(cinfo->max_v_samp_factor, cinfo->output_width,
input_data, output_data_ptr);
@@ -547,6 +558,9 @@ GLOBAL(void)
jsimd_h2v2_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
{
+ if (simd_support == ~0U)
+ init_simd();
+
if (simd_support & JSIMD_AVX2)
jsimd_h2v2_fancy_upsample_avx2(cinfo->max_v_samp_factor,
compptr->downsampled_width, input_data,
@@ -565,6 +579,9 @@ GLOBAL(void)
jsimd_h2v1_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
{
+ if (simd_support == ~0U)
+ init_simd();
+
if (simd_support & JSIMD_AVX2)
jsimd_h2v1_fancy_upsample_avx2(cinfo->max_v_samp_factor,
compptr->downsampled_width, input_data,
@@ -633,6 +650,9 @@ jsimd_h2v2_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
void (*sse2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
void (*mmxfct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
+ if (simd_support == ~0U)
+ init_simd();
+
switch (cinfo->out_color_space) {
case JCS_EXT_RGB:
avx2fct = jsimd_h2v2_extrgb_merged_upsample_avx2;
@@ -691,6 +711,9 @@ jsimd_h2v1_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
void (*sse2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
void (*mmxfct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
+ if (simd_support == ~0U)
+ init_simd();
+
switch (cinfo->out_color_space) {
case JCS_EXT_RGB:
avx2fct = jsimd_h2v1_extrgb_merged_upsample_avx2;
@@ -795,6 +818,9 @@ GLOBAL(void)
jsimd_convsamp(JSAMPARRAY sample_data, JDIMENSION start_col,
DCTELEM *workspace)
{
+ if (simd_support == ~0U)
+ init_simd();
+
if (simd_support & JSIMD_AVX2)
jsimd_convsamp_avx2(sample_data, start_col, workspace);
else if (simd_support & JSIMD_SSE2)
@@ -807,6 +833,9 @@ GLOBAL(void)
jsimd_convsamp_float(JSAMPARRAY sample_data, JDIMENSION start_col,
FAST_FLOAT *workspace)
{
+ if (simd_support == ~0U)
+ init_simd();
+
if (simd_support & JSIMD_SSE2)
jsimd_convsamp_float_sse2(sample_data, start_col, workspace);
else if (simd_support & JSIMD_SSE)
@@ -877,6 +906,9 @@ jsimd_can_fdct_float(void)
GLOBAL(void)
jsimd_fdct_islow(DCTELEM *data)
{
+ if (simd_support == ~0U)
+ init_simd();
+
if (simd_support & JSIMD_AVX2)
jsimd_fdct_islow_avx2(data);
else if (simd_support & JSIMD_SSE2)
@@ -888,6 +920,9 @@ jsimd_fdct_islow(DCTELEM *data)
GLOBAL(void)
jsimd_fdct_ifast(DCTELEM *data)
{
+ if (simd_support == ~0U)
+ init_simd();
+
if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_islow_sse2))
jsimd_fdct_ifast_sse2(data);
else
@@ -897,6 +932,9 @@ jsimd_fdct_ifast(DCTELEM *data)
GLOBAL(void)
jsimd_fdct_float(FAST_FLOAT *data)
{
+ if (simd_support == ~0U)
+ init_simd();
+
if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_fdct_float_sse))
jsimd_fdct_float_sse(data);
else if (simd_support & JSIMD_3DNOW)
@@ -952,6 +990,9 @@ jsimd_can_quantize_float(void)
GLOBAL(void)
jsimd_quantize(JCOEFPTR coef_block, DCTELEM *divisors, DCTELEM *workspace)
{
+ if (simd_support == ~0U)
+ init_simd();
+
if (simd_support & JSIMD_AVX2)
jsimd_quantize_avx2(coef_block, divisors, workspace);
else if (simd_support & JSIMD_SSE2)
@@ -964,6 +1005,9 @@ GLOBAL(void)
jsimd_quantize_float(JCOEFPTR coef_block, FAST_FLOAT *divisors,
FAST_FLOAT *workspace)
{
+ if (simd_support == ~0U)
+ init_simd();
+
if (simd_support & JSIMD_SSE2)
jsimd_quantize_float_sse2(coef_block, divisors, workspace);
else if (simd_support & JSIMD_SSE)
@@ -1027,6 +1071,9 @@ jsimd_idct_2x2(j_decompress_ptr cinfo, jpeg_component_info *compptr,
JCOEFPTR coef_block, JSAMPARRAY output_buf,
JDIMENSION output_col)
{
+ if (simd_support == ~0U)
+ init_simd();
+
if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
jsimd_idct_2x2_sse2(compptr->dct_table, coef_block, output_buf,
output_col);
@@ -1039,6 +1086,9 @@ jsimd_idct_4x4(j_decompress_ptr cinfo, jpeg_component_info *compptr,
JCOEFPTR coef_block, JSAMPARRAY output_buf,
JDIMENSION output_col)
{
+ if (simd_support == ~0U)
+ init_simd();
+
if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
jsimd_idct_4x4_sse2(compptr->dct_table, coef_block, output_buf,
output_col);
@@ -1133,6 +1183,9 @@ jsimd_idct_islow(j_decompress_ptr cinfo, jpeg_component_info *compptr,
JCOEFPTR coef_block, JSAMPARRAY output_buf,
JDIMENSION output_col)
{
+ if (simd_support == ~0U)
+ init_simd();
+
if (simd_support & JSIMD_AVX2)
jsimd_idct_islow_avx2(compptr->dct_table, coef_block, output_buf,
output_col);
@@ -1149,6 +1202,9 @@ jsimd_idct_ifast(j_decompress_ptr cinfo, jpeg_component_info *compptr,
JCOEFPTR coef_block, JSAMPARRAY output_buf,
JDIMENSION output_col)
{
+ if (simd_support == ~0U)
+ init_simd();
+
if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_ifast_sse2))
jsimd_idct_ifast_sse2(compptr->dct_table, coef_block, output_buf,
output_col);
@@ -1162,6 +1218,9 @@ jsimd_idct_float(j_decompress_ptr cinfo, jpeg_component_info *compptr,
JCOEFPTR coef_block, JSAMPARRAY output_buf,
JDIMENSION output_col)
{
+ if (simd_support == ~0U)
+ init_simd();
+
if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_float_sse2))
jsimd_idct_float_sse2(compptr->dct_table, coef_block, output_buf,
output_col);
@@ -1219,7 +1278,7 @@ jsimd_can_encode_mcu_AC_first_prepare(void)
GLOBAL(void)
jsimd_encode_mcu_AC_first_prepare(const JCOEF *block,
const int *jpeg_natural_order_start, int Sl,
- int Al, JCOEF *values, size_t *zerobits)
+ int Al, UJCOEF *values, size_t *zerobits)
{
jsimd_encode_mcu_AC_first_prepare_sse2(block, jpeg_natural_order_start,
Sl, Al, values, zerobits);
@@ -1245,7 +1304,7 @@ jsimd_can_encode_mcu_AC_refine_prepare(void)
GLOBAL(int)
jsimd_encode_mcu_AC_refine_prepare(const JCOEF *block,
const int *jpeg_natural_order_start, int Sl,
- int Al, JCOEF *absvalues, size_t *bits)
+ int Al, UJCOEF *absvalues, size_t *bits)
{
return jsimd_encode_mcu_AC_refine_prepare_sse2(block,
jpeg_natural_order_start,